misc spelling/wording/grammar fixes
[libav.git] / libswscale / swscale.c
CommitLineData
fe8054c0 1/*
d026b45e
DB
2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with FFmpeg; if not, write to the Free Software
b19bcbaa 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
d026b45e 19 *
807e0c66
LA
20 * the C code (not assembly, mmx, ...) of this file can be used
21 * under the LGPL license too
d026b45e 22 */
783e9cc9 23
28bf81c9 24/*
9990e426 25 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
caeaabe7 26 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
e09d12f4 27 {BGR,RGB}{1,4,8,15,16} support dithering
6a4970ab 28
e09d12f4
MN
29 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
30 YV12 -> {BGR,RGB}{1,4,8,15,16,24,32}
31 x -> x
32 YUV9 -> YV12
33 YUV9/YV12 -> Y800
34 Y800 -> YUV9/YV12
b935781b
MN
35 BGR24 -> BGR32 & RGB24 -> RGB32
36 BGR32 -> BGR24 & RGB32 -> RGB24
4bb3fa5e 37 BGR15 -> BGR16
b935781b
MN
38*/
39
6a4970ab 40/*
a6f6b237 41tested special converters (most are tested actually, but I did not write it down ...)
e09d12f4 42 YV12 -> BGR16
b935781b 43 YV12 -> YV12
4bb3fa5e 44 BGR15 -> BGR16
1e1c4fe9 45 BGR16 -> BGR16
e09d12f4 46 YVU9 -> YV12
b935781b
MN
47
48untested special converters
86bdf3fd 49 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be ok)
1e1c4fe9
MN
50 YV12/I420 -> YV12/I420
51 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
b935781b
MN
52 BGR24 -> BGR32 & RGB24 -> RGB32
53 BGR32 -> BGR24 & RGB32 -> RGB24
ec22603f 54 BGR24 -> YV12
28bf81c9
MN
55*/
56
d63a2cb1 57#define _SVID_SOURCE //needed for MAP_ANONYMOUS
d3f41512 58#include <inttypes.h>
dda87e9f 59#include <string.h>
077ea8a7 60#include <math.h>
c1b0bfb4 61#include <stdio.h>
171d7d78 62#include <unistd.h>
b2d374c9 63#include "config.h"
81b7c056 64#include <assert.h>
38d5c282
AJ
65#ifdef HAVE_SYS_MMAN_H
66#include <sys/mman.h>
113ef149
RD
67#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
68#define MAP_ANONYMOUS MAP_ANON
69#endif
38d5c282 70#endif
d604bab9 71#include "swscale.h"
5427e242 72#include "swscale_internal.h"
37079906 73#include "rgb2rgb.h"
83da2c6f
DB
74#include "libavutil/x86_cpu.h"
75#include "libavutil/bswap.h"
0d9f3d85 76
b3e03fa7
SS
77unsigned swscale_version(void)
78{
79 return LIBSWSCALE_VERSION_INT;
80}
81
541c4eb9 82#undef MOVNTQ
7d7f78b5 83#undef PAVGB
d3f41512 84
783e9cc9 85//#undef HAVE_MMX2
7f56a527 86//#define HAVE_3DNOW
d3f41512 87//#undef HAVE_MMX
783e9cc9 88//#undef ARCH_X86
2ba1bff0 89//#define WORDS_BIGENDIAN
d604bab9 90#define DITHER1XBPP
d3f41512 91
ac6a2e45
MN
92#define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
93
1e621b18 94#define RET 0xC3 //near return opcode for X86
c1b0bfb4 95
28bf81c9
MN
96#ifdef M_PI
97#define PI M_PI
98#else
99#define PI 3.14159265358979323846
100#endif
c1b0bfb4 101
9d9de37d
IP
102#define isSupportedIn(x) ( \
103 (x)==PIX_FMT_YUV420P \
79973335 104 || (x)==PIX_FMT_YUVA420P \
9d9de37d
IP
105 || (x)==PIX_FMT_YUYV422 \
106 || (x)==PIX_FMT_UYVY422 \
107 || (x)==PIX_FMT_RGB32 \
9990e426 108 || (x)==PIX_FMT_RGB32_1 \
9d9de37d
IP
109 || (x)==PIX_FMT_BGR24 \
110 || (x)==PIX_FMT_BGR565 \
111 || (x)==PIX_FMT_BGR555 \
112 || (x)==PIX_FMT_BGR32 \
9990e426 113 || (x)==PIX_FMT_BGR32_1 \
9d9de37d
IP
114 || (x)==PIX_FMT_RGB24 \
115 || (x)==PIX_FMT_RGB565 \
116 || (x)==PIX_FMT_RGB555 \
117 || (x)==PIX_FMT_GRAY8 \
118 || (x)==PIX_FMT_YUV410P \
119 || (x)==PIX_FMT_GRAY16BE \
120 || (x)==PIX_FMT_GRAY16LE \
121 || (x)==PIX_FMT_YUV444P \
122 || (x)==PIX_FMT_YUV422P \
123 || (x)==PIX_FMT_YUV411P \
124 || (x)==PIX_FMT_PAL8 \
125 || (x)==PIX_FMT_BGR8 \
126 || (x)==PIX_FMT_RGB8 \
127 || (x)==PIX_FMT_BGR4_BYTE \
128 || (x)==PIX_FMT_RGB4_BYTE \
9ba7fe6d 129 || (x)==PIX_FMT_YUV440P \
9d9de37d
IP
130 )
131#define isSupportedOut(x) ( \
132 (x)==PIX_FMT_YUV420P \
133 || (x)==PIX_FMT_YUYV422 \
134 || (x)==PIX_FMT_UYVY422 \
135 || (x)==PIX_FMT_YUV444P \
136 || (x)==PIX_FMT_YUV422P \
137 || (x)==PIX_FMT_YUV411P \
138 || isRGB(x) \
139 || isBGR(x) \
140 || (x)==PIX_FMT_NV12 \
141 || (x)==PIX_FMT_NV21 \
142 || (x)==PIX_FMT_GRAY16BE \
143 || (x)==PIX_FMT_GRAY16LE \
144 || (x)==PIX_FMT_GRAY8 \
145 || (x)==PIX_FMT_YUV410P \
146 )
147#define isPacked(x) ( \
148 (x)==PIX_FMT_PAL8 \
149 || (x)==PIX_FMT_YUYV422 \
150 || (x)==PIX_FMT_UYVY422 \
151 || isRGB(x) \
152 || isBGR(x) \
153 )
6ff0ad6b 154
6b79dbce 155#define RGB2YUV_SHIFT 15
7b5d7b9e
MN
156#define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
157#define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
158#define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
159#define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
160#define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
161#define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
162#define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
163#define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
164#define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
6c7506de 165
0481412a
MN
166extern const int32_t Inverse_Table_6_9[8][4];
167
0f5d4aa8
MN
168static const double rgb2yuv_table[8][9]={
169 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5},
170 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5},
171 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
172 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
173 {0.59 , 0.11 , 0.30 , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
174 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
175 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //SMPTE 170M
176 {0.701 , 0.087 , 0.212 , -0.384, 0.5 -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
177};
178
783e9cc9
MN
179/*
180NOTES
d604bab9 181Special versions: fast Y 1:1 scaling (no interpolation in y direction)
31190492 182
783e9cc9 183TODO
bd7c6fd5 184more intelligent misalignment avoidance for the horizontal scaler
c1b0bfb4
MN
185write special vertical cubic upscale version
186Optimize C code (yv12 / minmax)
ff7ba856 187add support for packed pixel yuv input & output
6ff0ad6b
MN
188add support for Y8 output
189optimize bgr24 & bgr32
ff7ba856 190add BGR4 output support
1e621b18 191write special BGR->BGR scaler
783e9cc9 192*/
31190492 193
9bde778e 194#if defined(ARCH_X86) && defined (CONFIG_GPL)
d334c7c2
RD
195DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
196DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
197DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL;
198DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL;
199DECLARE_ASM_CONST(8, uint64_t, bm00001111)=0x00000000FFFFFFFFLL;
200DECLARE_ASM_CONST(8, uint64_t, bm00000111)=0x0000000000FFFFFFLL;
201DECLARE_ASM_CONST(8, uint64_t, bm11111000)=0xFFFFFFFFFF000000LL;
202DECLARE_ASM_CONST(8, uint64_t, bm01010101)=0x00FF00FF00FF00FFLL;
d604bab9 203
db7a2e0d
MC
204static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither;
205static volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither;
206static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither;
207static volatile uint64_t attribute_used __attribute__((aligned(8))) r5Dither;
d8fa3c54 208
0cb25594 209const DECLARE_ALIGNED(8, uint64_t, ff_dither4[2]) = {
221b804f
DB
210 0x0103010301030103LL,
211 0x0200020002000200LL,};
d8fa3c54 212
0cb25594 213const DECLARE_ALIGNED(8, uint64_t, ff_dither8[2]) = {
221b804f
DB
214 0x0602060206020602LL,
215 0x0004000400040004LL,};
d604bab9 216
d334c7c2
RD
217DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL;
218DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL;
219DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL;
220DECLARE_ASM_CONST(8, uint64_t, b15Mask)= 0x001F001F001F001FLL;
221DECLARE_ASM_CONST(8, uint64_t, g15Mask)= 0x03E003E003E003E0LL;
222DECLARE_ASM_CONST(8, uint64_t, r15Mask)= 0x7C007C007C007C00LL;
d604bab9 223
5802683a
RD
224DECLARE_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL;
225DECLARE_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL;
226DECLARE_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL;
99d2cb72 227
ac6a2e45 228#ifdef FAST_BGR2YV12
5802683a
RD
229DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000000210041000DULL;
230DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000FFEEFFDC0038ULL;
231DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00000038FFD2FFF8ULL;
ac6a2e45 232#else
5802683a
RD
233DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000020E540830C8BULL;
234DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000ED0FDAC23831ULL;
235DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00003831D0E6F6EAULL;
69796008 236#endif /* FAST_BGR2YV12 */
5802683a
RD
237DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
238DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
239DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
dfb09bd1
MN
240
241DECLARE_ALIGNED(8, const uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL;
242DECLARE_ALIGNED(8, const uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL;
243DECLARE_ALIGNED(8, const uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL;
244DECLARE_ALIGNED(8, const uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL;
245DECLARE_ALIGNED(8, const uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL;
246
247DECLARE_ALIGNED(8, const uint64_t, ff_bgr24toUV[2][4]) = {
248 {0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL},
249 {0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL},
250};
251
252DECLARE_ALIGNED(8, const uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL;
253
3d6a30d9 254#endif /* defined(ARCH_X86) */
783e9cc9
MN
255
256// clipping helper table for C implementations:
257static unsigned char clip_table[768];
258
d4e24275 259static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b);
6a4970ab 260
cf7d1c1a
MN
261extern const uint8_t dither_2x2_4[2][8];
262extern const uint8_t dither_2x2_8[2][8];
263extern const uint8_t dither_8x8_32[8][8];
264extern const uint8_t dither_8x8_73[8][8];
265extern const uint8_t dither_8x8_220[8][8];
5cebb24b 266
8055ede6 267const char *sws_format_name(enum PixelFormat format)
94c4def2 268{
e9e12f0e
LA
269 switch (format) {
270 case PIX_FMT_YUV420P:
271 return "yuv420p";
79973335
AJ
272 case PIX_FMT_YUVA420P:
273 return "yuva420p";
e9e12f0e
LA
274 case PIX_FMT_YUYV422:
275 return "yuyv422";
276 case PIX_FMT_RGB24:
277 return "rgb24";
278 case PIX_FMT_BGR24:
279 return "bgr24";
280 case PIX_FMT_YUV422P:
281 return "yuv422p";
282 case PIX_FMT_YUV444P:
283 return "yuv444p";
284 case PIX_FMT_RGB32:
285 return "rgb32";
286 case PIX_FMT_YUV410P:
287 return "yuv410p";
288 case PIX_FMT_YUV411P:
289 return "yuv411p";
290 case PIX_FMT_RGB565:
291 return "rgb565";
292 case PIX_FMT_RGB555:
293 return "rgb555";
4884b9e5
KS
294 case PIX_FMT_GRAY16BE:
295 return "gray16be";
296 case PIX_FMT_GRAY16LE:
297 return "gray16le";
e9e12f0e
LA
298 case PIX_FMT_GRAY8:
299 return "gray8";
300 case PIX_FMT_MONOWHITE:
301 return "mono white";
302 case PIX_FMT_MONOBLACK:
303 return "mono black";
304 case PIX_FMT_PAL8:
305 return "Palette";
306 case PIX_FMT_YUVJ420P:
307 return "yuvj420p";
308 case PIX_FMT_YUVJ422P:
309 return "yuvj422p";
310 case PIX_FMT_YUVJ444P:
311 return "yuvj444p";
312 case PIX_FMT_XVMC_MPEG2_MC:
313 return "xvmc_mpeg2_mc";
314 case PIX_FMT_XVMC_MPEG2_IDCT:
315 return "xvmc_mpeg2_idct";
316 case PIX_FMT_UYVY422:
317 return "uyvy422";
318 case PIX_FMT_UYYVYY411:
319 return "uyyvyy411";
320 case PIX_FMT_RGB32_1:
321 return "rgb32x";
322 case PIX_FMT_BGR32_1:
323 return "bgr32x";
324 case PIX_FMT_BGR32:
325 return "bgr32";
326 case PIX_FMT_BGR565:
327 return "bgr565";
328 case PIX_FMT_BGR555:
329 return "bgr555";
330 case PIX_FMT_BGR8:
331 return "bgr8";
332 case PIX_FMT_BGR4:
333 return "bgr4";
334 case PIX_FMT_BGR4_BYTE:
335 return "bgr4 byte";
336 case PIX_FMT_RGB8:
337 return "rgb8";
338 case PIX_FMT_RGB4:
339 return "rgb4";
340 case PIX_FMT_RGB4_BYTE:
341 return "rgb4 byte";
342 case PIX_FMT_NV12:
343 return "nv12";
344 case PIX_FMT_NV21:
345 return "nv21";
9ba7fe6d
346 case PIX_FMT_YUV440P:
347 return "yuv440p";
e9e12f0e
LA
348 default:
349 return "Unknown format";
350 }
94c4def2
LA
351}
352
5859233b 353static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
221b804f
DB
354 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
355 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
e3d2500f 356{
221b804f
DB
357 //FIXME Optimize (just quickly writen not opti..)
358 int i;
359 for (i=0; i<dstW; i++)
360 {
361 int val=1<<18;
362 int j;
363 for (j=0; j<lumFilterSize; j++)
364 val += lumSrc[j][i] * lumFilter[j];
365
366 dest[i]= av_clip_uint8(val>>19);
367 }
368
1b0a4572 369 if (uDest)
221b804f
DB
370 for (i=0; i<chrDstW; i++)
371 {
372 int u=1<<18;
373 int v=1<<18;
374 int j;
375 for (j=0; j<chrFilterSize; j++)
376 {
377 u += chrSrc[j][i] * chrFilter[j];
8b2fce0d 378 v += chrSrc[j][i + VOFW] * chrFilter[j];
221b804f
DB
379 }
380
381 uDest[i]= av_clip_uint8(u>>19);
382 vDest[i]= av_clip_uint8(v>>19);
383 }
e3d2500f
MN
384}
385
6118e52e 386static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
221b804f
DB
387 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
388 uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
6118e52e 389{
221b804f
DB
390 //FIXME Optimize (just quickly writen not opti..)
391 int i;
392 for (i=0; i<dstW; i++)
393 {
394 int val=1<<18;
395 int j;
396 for (j=0; j<lumFilterSize; j++)
397 val += lumSrc[j][i] * lumFilter[j];
398
399 dest[i]= av_clip_uint8(val>>19);
400 }
401
1b0a4572 402 if (!uDest)
221b804f
DB
403 return;
404
405 if (dstFormat == PIX_FMT_NV12)
406 for (i=0; i<chrDstW; i++)
407 {
408 int u=1<<18;
409 int v=1<<18;
410 int j;
411 for (j=0; j<chrFilterSize; j++)
412 {
413 u += chrSrc[j][i] * chrFilter[j];
8b2fce0d 414 v += chrSrc[j][i + VOFW] * chrFilter[j];
221b804f
DB
415 }
416
417 uDest[2*i]= av_clip_uint8(u>>19);
418 uDest[2*i+1]= av_clip_uint8(v>>19);
419 }
420 else
421 for (i=0; i<chrDstW; i++)
422 {
423 int u=1<<18;
424 int v=1<<18;
425 int j;
426 for (j=0; j<chrFilterSize; j++)
427 {
428 u += chrSrc[j][i] * chrFilter[j];
8b2fce0d 429 v += chrSrc[j][i + VOFW] * chrFilter[j];
221b804f
DB
430 }
431
432 uDest[2*i]= av_clip_uint8(v>>19);
433 uDest[2*i+1]= av_clip_uint8(u>>19);
434 }
6118e52e 435}
46de8b73 436
bdf397ba 437#define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type) \
221b804f
DB
438 for (i=0; i<(dstW>>1); i++){\
439 int j;\
440 int Y1 = 1<<18;\
441 int Y2 = 1<<18;\
442 int U = 1<<18;\
443 int V = 1<<18;\
2db27aad 444 type av_unused *r, *b, *g;\
221b804f
DB
445 const int i2= 2*i;\
446 \
447 for (j=0; j<lumFilterSize; j++)\
448 {\
449 Y1 += lumSrc[j][i2] * lumFilter[j];\
450 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
451 }\
452 for (j=0; j<chrFilterSize; j++)\
453 {\
454 U += chrSrc[j][i] * chrFilter[j];\
8b2fce0d 455 V += chrSrc[j][i+VOFW] * chrFilter[j];\
221b804f
DB
456 }\
457 Y1>>=19;\
458 Y2>>=19;\
459 U >>=19;\
460 V >>=19;\
bdf397ba
MN
461
462#define YSCALE_YUV_2_PACKEDX_C(type) \
463 YSCALE_YUV_2_PACKEDX_NOCLIP_C(type)\
221b804f
DB
464 if ((Y1|Y2|U|V)&256)\
465 {\
466 if (Y1>255) Y1=255; \
467 else if (Y1<0)Y1=0; \
468 if (Y2>255) Y2=255; \
469 else if (Y2<0)Y2=0; \
470 if (U>255) U=255; \
471 else if (U<0) U=0; \
472 if (V>255) V=255; \
473 else if (V<0) V=0; \
474 }
6a4970ab 475
e69bd294 476#define YSCALE_YUV_2_GRAY16_C \
b0880d5d
MN
477 for (i=0; i<(dstW>>1); i++){\
478 int j;\
479 int Y1 = 1<<18;\
480 int Y2 = 1<<18;\
481 int U = 1<<18;\
482 int V = 1<<18;\
e69bd294 483 \
b0880d5d
MN
484 const int i2= 2*i;\
485 \
486 for (j=0; j<lumFilterSize; j++)\
487 {\
488 Y1 += lumSrc[j][i2] * lumFilter[j];\
489 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
490 }\
491 Y1>>=11;\
492 Y2>>=11;\
493 if ((Y1|Y2|U|V)&65536)\
494 {\
495 if (Y1>65535) Y1=65535; \
496 else if (Y1<0)Y1=0; \
497 if (Y2>65535) Y2=65535; \
498 else if (Y2<0)Y2=0; \
499 }
500
46de8b73 501#define YSCALE_YUV_2_RGBX_C(type) \
bdf397ba 502 YSCALE_YUV_2_PACKEDX_NOCLIP_C(type) \
221b804f
DB
503 r = (type *)c->table_rV[V]; \
504 g = (type *)(c->table_gU[U] + c->table_gV[V]); \
505 b = (type *)c->table_bU[U]; \
506
507#define YSCALE_YUV_2_PACKED2_C \
508 for (i=0; i<(dstW>>1); i++){ \
509 const int i2= 2*i; \
510 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \
511 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \
512 int U= (uvbuf0[i ]*uvalpha1+uvbuf1[i ]*uvalpha)>>19; \
8b2fce0d 513 int V= (uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19; \
46de8b73 514
b0880d5d
MN
515#define YSCALE_YUV_2_GRAY16_2_C \
516 for (i=0; i<(dstW>>1); i++){ \
517 const int i2= 2*i; \
518 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>11; \
519 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11; \
520
46de8b73 521#define YSCALE_YUV_2_RGB2_C(type) \
221b804f
DB
522 YSCALE_YUV_2_PACKED2_C\
523 type *r, *b, *g;\
524 r = (type *)c->table_rV[V];\
525 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
526 b = (type *)c->table_bU[U];\
cf7d1c1a 527
25593e29 528#define YSCALE_YUV_2_PACKED1_C \
221b804f
DB
529 for (i=0; i<(dstW>>1); i++){\
530 const int i2= 2*i;\
531 int Y1= buf0[i2 ]>>7;\
532 int Y2= buf0[i2+1]>>7;\
533 int U= (uvbuf1[i ])>>7;\
8b2fce0d 534 int V= (uvbuf1[i+VOFW])>>7;\
46de8b73 535
b0880d5d
MN
536#define YSCALE_YUV_2_GRAY16_1_C \
537 for (i=0; i<(dstW>>1); i++){\
538 const int i2= 2*i;\
539 int Y1= buf0[i2 ]<<1;\
540 int Y2= buf0[i2+1]<<1;\
541
46de8b73 542#define YSCALE_YUV_2_RGB1_C(type) \
221b804f
DB
543 YSCALE_YUV_2_PACKED1_C\
544 type *r, *b, *g;\
545 r = (type *)c->table_rV[V];\
546 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
547 b = (type *)c->table_bU[U];\
cf7d1c1a 548
25593e29 549#define YSCALE_YUV_2_PACKED1B_C \
221b804f
DB
550 for (i=0; i<(dstW>>1); i++){\
551 const int i2= 2*i;\
552 int Y1= buf0[i2 ]>>7;\
553 int Y2= buf0[i2+1]>>7;\
554 int U= (uvbuf0[i ] + uvbuf1[i ])>>8;\
8b2fce0d 555 int V= (uvbuf0[i+VOFW] + uvbuf1[i+VOFW])>>8;\
46de8b73
MN
556
557#define YSCALE_YUV_2_RGB1B_C(type) \
221b804f
DB
558 YSCALE_YUV_2_PACKED1B_C\
559 type *r, *b, *g;\
560 r = (type *)c->table_rV[V];\
561 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
562 b = (type *)c->table_bU[U];\
cf7d1c1a 563
e69bd294
MN
564#define YSCALE_YUV_2_MONOBLACK2_C \
565 const uint8_t * const d128=dither_8x8_220[y&7];\
566 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
567 for (i=0; i<dstW-7; i+=8){\
568 int acc;\
569 acc = g[((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19) + d128[0]];\
570 acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
571 acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
572 acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
573 acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
574 acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
575 acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
576 acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
577 ((uint8_t*)dest)[0]= acc;\
578 dest++;\
579 }\
580
581
582#define YSCALE_YUV_2_MONOBLACKX_C \
583 const uint8_t * const d128=dither_8x8_220[y&7];\
584 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
585 int acc=0;\
586 for (i=0; i<dstW-1; i+=2){\
587 int j;\
588 int Y1=1<<18;\
589 int Y2=1<<18;\
590\
591 for (j=0; j<lumFilterSize; j++)\
592 {\
593 Y1 += lumSrc[j][i] * lumFilter[j];\
594 Y2 += lumSrc[j][i+1] * lumFilter[j];\
595 }\
596 Y1>>=19;\
597 Y2>>=19;\
598 if ((Y1|Y2)&256)\
599 {\
600 if (Y1>255) Y1=255;\
601 else if (Y1<0)Y1=0;\
602 if (Y2>255) Y2=255;\
603 else if (Y2<0)Y2=0;\
604 }\
605 acc+= acc + g[Y1+d128[(i+0)&7]];\
606 acc+= acc + g[Y2+d128[(i+1)&7]];\
607 if ((i&7)==6){\
608 ((uint8_t*)dest)[0]= acc;\
609 dest++;\
610 }\
611 }
612
613
614#define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
221b804f
DB
615 switch(c->dstFormat)\
616 {\
617 case PIX_FMT_RGB32:\
618 case PIX_FMT_BGR32:\
9990e426
MN
619 case PIX_FMT_RGB32_1:\
620 case PIX_FMT_BGR32_1:\
221b804f
DB
621 func(uint32_t)\
622 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
623 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
624 } \
625 break;\
626 case PIX_FMT_RGB24:\
627 func(uint8_t)\
628 ((uint8_t*)dest)[0]= r[Y1];\
629 ((uint8_t*)dest)[1]= g[Y1];\
630 ((uint8_t*)dest)[2]= b[Y1];\
631 ((uint8_t*)dest)[3]= r[Y2];\
632 ((uint8_t*)dest)[4]= g[Y2];\
633 ((uint8_t*)dest)[5]= b[Y2];\
634 dest+=6;\
635 }\
636 break;\
637 case PIX_FMT_BGR24:\
638 func(uint8_t)\
639 ((uint8_t*)dest)[0]= b[Y1];\
640 ((uint8_t*)dest)[1]= g[Y1];\
641 ((uint8_t*)dest)[2]= r[Y1];\
642 ((uint8_t*)dest)[3]= b[Y2];\
643 ((uint8_t*)dest)[4]= g[Y2];\
644 ((uint8_t*)dest)[5]= r[Y2];\
645 dest+=6;\
646 }\
647 break;\
648 case PIX_FMT_RGB565:\
649 case PIX_FMT_BGR565:\
650 {\
651 const int dr1= dither_2x2_8[y&1 ][0];\
652 const int dg1= dither_2x2_4[y&1 ][0];\
653 const int db1= dither_2x2_8[(y&1)^1][0];\
654 const int dr2= dither_2x2_8[y&1 ][1];\
655 const int dg2= dither_2x2_4[y&1 ][1];\
656 const int db2= dither_2x2_8[(y&1)^1][1];\
657 func(uint16_t)\
658 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
659 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
660 }\
661 }\
662 break;\
663 case PIX_FMT_RGB555:\
664 case PIX_FMT_BGR555:\
665 {\
666 const int dr1= dither_2x2_8[y&1 ][0];\
667 const int dg1= dither_2x2_8[y&1 ][1];\
668 const int db1= dither_2x2_8[(y&1)^1][0];\
669 const int dr2= dither_2x2_8[y&1 ][1];\
670 const int dg2= dither_2x2_8[y&1 ][0];\
671 const int db2= dither_2x2_8[(y&1)^1][1];\
672 func(uint16_t)\
673 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
674 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
675 }\
676 }\
677 break;\
678 case PIX_FMT_RGB8:\
679 case PIX_FMT_BGR8:\
680 {\
681 const uint8_t * const d64= dither_8x8_73[y&7];\
682 const uint8_t * const d32= dither_8x8_32[y&7];\
683 func(uint8_t)\
684 ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
685 ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
686 }\
687 }\
688 break;\
689 case PIX_FMT_RGB4:\
690 case PIX_FMT_BGR4:\
691 {\
692 const uint8_t * const d64= dither_8x8_73 [y&7];\
693 const uint8_t * const d128=dither_8x8_220[y&7];\
694 func(uint8_t)\
695 ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
696 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
697 }\
698 }\
699 break;\
700 case PIX_FMT_RGB4_BYTE:\
701 case PIX_FMT_BGR4_BYTE:\
702 {\
703 const uint8_t * const d64= dither_8x8_73 [y&7];\
704 const uint8_t * const d128=dither_8x8_220[y&7];\
705 func(uint8_t)\
706 ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
707 ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
708 }\
709 }\
710 break;\
711 case PIX_FMT_MONOBLACK:\
712 {\
e69bd294 713 func_monoblack\
221b804f
DB
714 }\
715 break;\
716 case PIX_FMT_YUYV422:\
717 func2\
718 ((uint8_t*)dest)[2*i2+0]= Y1;\
719 ((uint8_t*)dest)[2*i2+1]= U;\
720 ((uint8_t*)dest)[2*i2+2]= Y2;\
721 ((uint8_t*)dest)[2*i2+3]= V;\
722 } \
723 break;\
724 case PIX_FMT_UYVY422:\
725 func2\
726 ((uint8_t*)dest)[2*i2+0]= U;\
727 ((uint8_t*)dest)[2*i2+1]= Y1;\
728 ((uint8_t*)dest)[2*i2+2]= V;\
729 ((uint8_t*)dest)[2*i2+3]= Y2;\
730 } \
731 break;\
b0880d5d
MN
732 case PIX_FMT_GRAY16BE:\
733 func_g16\
734 ((uint8_t*)dest)[2*i2+0]= Y1>>8;\
735 ((uint8_t*)dest)[2*i2+1]= Y1;\
736 ((uint8_t*)dest)[2*i2+2]= Y2>>8;\
737 ((uint8_t*)dest)[2*i2+3]= Y2;\
738 } \
739 break;\
740 case PIX_FMT_GRAY16LE:\
741 func_g16\
742 ((uint8_t*)dest)[2*i2+0]= Y1;\
743 ((uint8_t*)dest)[2*i2+1]= Y1>>8;\
744 ((uint8_t*)dest)[2*i2+2]= Y2;\
745 ((uint8_t*)dest)[2*i2+3]= Y2>>8;\
746 } \
747 break;\
221b804f 748 }\
cf7d1c1a
MN
749
750
25593e29 751static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
221b804f
DB
752 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
753 uint8_t *dest, int dstW, int y)
e3d2500f 754{
221b804f 755 int i;
e69bd294 756 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOBLACKX_C)
e3d2500f
MN
757}
758
759
7630f2e0
MN
760//Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
761//Plain C versions
9bde778e 762#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT) || !defined(CONFIG_GPL)
726a959a
MN
763#define COMPILE_C
764#endif
765
a2faa401 766#ifdef ARCH_POWERPC
9bde778e 767#if (defined (HAVE_ALTIVEC) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
a2faa401
RD
768#define COMPILE_ALTIVEC
769#endif //HAVE_ALTIVEC
770#endif //ARCH_POWERPC
771
3d6a30d9 772#if defined(ARCH_X86)
726a959a 773
9bde778e 774#if ((defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
726a959a
MN
775#define COMPILE_MMX
776#endif
777
9bde778e 778#if (defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
726a959a
MN
779#define COMPILE_MMX2
780#endif
781
9bde778e 782#if ((defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
726a959a
MN
783#define COMPILE_3DNOW
784#endif
6e1c66bc 785#endif //ARCH_X86 || ARCH_X86_64
726a959a
MN
786
787#undef HAVE_MMX
788#undef HAVE_MMX2
789#undef HAVE_3DNOW
726a959a
MN
790
791#ifdef COMPILE_C
7630f2e0
MN
792#undef HAVE_MMX
793#undef HAVE_MMX2
794#undef HAVE_3DNOW
a2faa401 795#undef HAVE_ALTIVEC
7630f2e0
MN
796#define RENAME(a) a ## _C
797#include "swscale_template.c"
726a959a 798#endif
397c035e 799
a2faa401
RD
800#ifdef COMPILE_ALTIVEC
801#undef RENAME
802#define HAVE_ALTIVEC
803#define RENAME(a) a ## _altivec
804#include "swscale_template.c"
805#endif
a2faa401 806
3d6a30d9 807#if defined(ARCH_X86)
397c035e 808
7630f2e0
MN
809//X86 versions
810/*
811#undef RENAME
812#undef HAVE_MMX
813#undef HAVE_MMX2
814#undef HAVE_3DNOW
815#define ARCH_X86
816#define RENAME(a) a ## _X86
817#include "swscale_template.c"
1faf0867 818*/
7630f2e0 819//MMX versions
726a959a 820#ifdef COMPILE_MMX
7630f2e0
MN
821#undef RENAME
822#define HAVE_MMX
823#undef HAVE_MMX2
824#undef HAVE_3DNOW
7630f2e0
MN
825#define RENAME(a) a ## _MMX
826#include "swscale_template.c"
726a959a 827#endif
7630f2e0
MN
828
829//MMX2 versions
726a959a 830#ifdef COMPILE_MMX2
7630f2e0
MN
831#undef RENAME
832#define HAVE_MMX
833#define HAVE_MMX2
834#undef HAVE_3DNOW
7630f2e0
MN
835#define RENAME(a) a ## _MMX2
836#include "swscale_template.c"
726a959a 837#endif
7630f2e0
MN
838
839//3DNOW versions
726a959a 840#ifdef COMPILE_3DNOW
7630f2e0
MN
841#undef RENAME
842#define HAVE_MMX
843#undef HAVE_MMX2
844#define HAVE_3DNOW
7630f2e0
MN
845#define RENAME(a) a ## _3DNow
846#include "swscale_template.c"
726a959a 847#endif
7630f2e0 848
6e1c66bc 849#endif //ARCH_X86 || ARCH_X86_64
7630f2e0 850
77a416e8 851// minor note: the HAVE_xyz is messed up after that line so don't use it
d604bab9 852
a86c461c
MN
853static double getSplineCoeff(double a, double b, double c, double d, double dist)
854{
221b804f
DB
855// printf("%f %f %f %f %f\n", a,b,c,d,dist);
856 if (dist<=1.0) return ((d*dist + c)*dist + b)*dist +a;
857 else return getSplineCoeff( 0.0,
858 b+ 2.0*c + 3.0*d,
859 c + 3.0*d,
860 -b- 3.0*c - 6.0*d,
861 dist-1.0);
a86c461c 862}
6c7506de 863
bca11e75 864static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
221b804f
DB
865 int srcW, int dstW, int filterAlign, int one, int flags,
866 SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
28bf81c9 867{
221b804f
DB
868 int i;
869 int filterSize;
870 int filter2Size;
871 int minFilterSize;
872 double *filter=NULL;
873 double *filter2=NULL;
091d3bdc 874 int ret= -1;
3d6a30d9 875#if defined(ARCH_X86)
221b804f 876 if (flags & SWS_CPU_CAPS_MMX)
86bdf3fd 877 asm volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
726a959a 878#endif
31190492 879
221b804f
DB
880 // Note the +1 is for the MMXscaler which reads over the end
881 *filterPos = av_malloc((dstW+1)*sizeof(int16_t));
882
883 if (FFABS(xInc - 0x10000) <10) // unscaled
884 {
885 int i;
886 filterSize= 1;
887 filter= av_malloc(dstW*sizeof(double)*filterSize);
888 for (i=0; i<dstW*filterSize; i++) filter[i]=0;
889
890 for (i=0; i<dstW; i++)
891 {
892 filter[i*filterSize]=1;
893 (*filterPos)[i]=i;
894 }
895
896 }
897 else if (flags&SWS_POINT) // lame looking point sampling mode
898 {
899 int i;
900 int xDstInSrc;
901 filterSize= 1;
902 filter= av_malloc(dstW*sizeof(double)*filterSize);
903
904 xDstInSrc= xInc/2 - 0x8000;
905 for (i=0; i<dstW; i++)
906 {
907 int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
908
909 (*filterPos)[i]= xx;
910 filter[i]= 1.0;
911 xDstInSrc+= xInc;
912 }
913 }
914 else if ((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) // bilinear upscale
915 {
916 int i;
917 int xDstInSrc;
918 if (flags&SWS_BICUBIC) filterSize= 4;
919 else if (flags&SWS_X ) filterSize= 4;
920 else filterSize= 2; // SWS_BILINEAR / SWS_AREA
921 filter= av_malloc(dstW*sizeof(double)*filterSize);
922
923 xDstInSrc= xInc/2 - 0x8000;
924 for (i=0; i<dstW; i++)
925 {
926 int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
927 int j;
928
929 (*filterPos)[i]= xx;
930 //Bilinear upscale / linear interpolate / Area averaging
931 for (j=0; j<filterSize; j++)
932 {
933 double d= FFABS((xx<<16) - xDstInSrc)/(double)(1<<16);
934 double coeff= 1.0 - d;
935 if (coeff<0) coeff=0;
936 filter[i*filterSize + j]= coeff;
937 xx++;
938 }
939 xDstInSrc+= xInc;
940 }
941 }
942 else
943 {
944 double xDstInSrc;
945 double sizeFactor, filterSizeInSrc;
946 const double xInc1= (double)xInc / (double)(1<<16);
947
948 if (flags&SWS_BICUBIC) sizeFactor= 4.0;
949 else if (flags&SWS_X) sizeFactor= 8.0;
950 else if (flags&SWS_AREA) sizeFactor= 1.0; //downscale only, for upscale it is bilinear
951 else if (flags&SWS_GAUSS) sizeFactor= 8.0; // infinite ;)
952 else if (flags&SWS_LANCZOS) sizeFactor= param[0] != SWS_PARAM_DEFAULT ? 2.0*param[0] : 6.0;
953 else if (flags&SWS_SINC) sizeFactor= 20.0; // infinite ;)
954 else if (flags&SWS_SPLINE) sizeFactor= 20.0; // infinite ;)
955 else if (flags&SWS_BILINEAR) sizeFactor= 2.0;
956 else {
957 sizeFactor= 0.0; //GCC warning killer
fcc402b1 958 assert(0);
221b804f
DB
959 }
960
961 if (xInc1 <= 1.0) filterSizeInSrc= sizeFactor; // upscale
962 else filterSizeInSrc= sizeFactor*srcW / (double)dstW;
963
964 filterSize= (int)ceil(1 + filterSizeInSrc); // will be reduced later if possible
965 if (filterSize > srcW-2) filterSize=srcW-2;
966
967 filter= av_malloc(dstW*sizeof(double)*filterSize);
968
969 xDstInSrc= xInc1 / 2.0 - 0.5;
970 for (i=0; i<dstW; i++)
971 {
972 int xx= (int)(xDstInSrc - (filterSize-1)*0.5 + 0.5);
973 int j;
974 (*filterPos)[i]= xx;
975 for (j=0; j<filterSize; j++)
976 {
977 double d= FFABS(xx - xDstInSrc)/filterSizeInSrc*sizeFactor;
978 double coeff;
979 if (flags & SWS_BICUBIC)
980 {
981 double B= param[0] != SWS_PARAM_DEFAULT ? param[0] : 0.0;
982 double C= param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6;
983
984 if (d<1.0)
985 coeff = (12-9*B-6*C)*d*d*d + (-18+12*B+6*C)*d*d + 6-2*B;
986 else if (d<2.0)
987 coeff = (-B-6*C)*d*d*d + (6*B+30*C)*d*d + (-12*B-48*C)*d +8*B+24*C;
988 else
989 coeff=0.0;
990 }
991/* else if (flags & SWS_X)
992 {
993 double p= param ? param*0.01 : 0.3;
994 coeff = d ? sin(d*PI)/(d*PI) : 1.0;
995 coeff*= pow(2.0, - p*d*d);
996 }*/
997 else if (flags & SWS_X)
998 {
999 double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
1000
1001 if (d<1.0)
1002 coeff = cos(d*PI);
1003 else
1004 coeff=-1.0;
1005 if (coeff<0.0) coeff= -pow(-coeff, A);
1006 else coeff= pow( coeff, A);
1007 coeff= coeff*0.5 + 0.5;
1008 }
1009 else if (flags & SWS_AREA)
1010 {
1011 double srcPixelSize= 1.0/xInc1;
1012 if (d + srcPixelSize/2 < 0.5) coeff= 1.0;
1013 else if (d - srcPixelSize/2 < 0.5) coeff= (0.5-d)/srcPixelSize + 0.5;
1014 else coeff=0.0;
1015 }
1016 else if (flags & SWS_GAUSS)
1017 {
1018 double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
1019 coeff = pow(2.0, - p*d*d);
1020 }
1021 else if (flags & SWS_SINC)
1022 {
1023 coeff = d ? sin(d*PI)/(d*PI) : 1.0;
1024 }
1025 else if (flags & SWS_LANCZOS)
1026 {
1027 double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
1028 coeff = d ? sin(d*PI)*sin(d*PI/p)/(d*d*PI*PI/p) : 1.0;
1029 if (d>p) coeff=0;
1030 }
1031 else if (flags & SWS_BILINEAR)
1032 {
1033 coeff= 1.0 - d;
1034 if (coeff<0) coeff=0;
1035 }
1036 else if (flags & SWS_SPLINE)
1037 {
1038 double p=-2.196152422706632;
1039 coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, d);
1040 }
1041 else {
1042 coeff= 0.0; //GCC warning killer
fcc402b1 1043 assert(0);
221b804f
DB
1044 }
1045
1046 filter[i*filterSize + j]= coeff;
1047 xx++;
1048 }
1049 xDstInSrc+= xInc1;
1050 }
1051 }
1052
1053 /* apply src & dst Filter to filter -> filter2
1054 av_free(filter);
1055 */
fcc402b1 1056 assert(filterSize>0);
221b804f
DB
1057 filter2Size= filterSize;
1058 if (srcFilter) filter2Size+= srcFilter->length - 1;
1059 if (dstFilter) filter2Size+= dstFilter->length - 1;
fcc402b1 1060 assert(filter2Size>0);
221b804f
DB
1061 filter2= av_malloc(filter2Size*dstW*sizeof(double));
1062
1063 for (i=0; i<dstW; i++)
1064 {
1065 int j;
1066 SwsVector scaleFilter;
1067 SwsVector *outVec;
1068
1069 scaleFilter.coeff= filter + i*filterSize;
1070 scaleFilter.length= filterSize;
1071
1072 if (srcFilter) outVec= sws_getConvVec(srcFilter, &scaleFilter);
1073 else outVec= &scaleFilter;
1074
fcc402b1 1075 assert(outVec->length == filter2Size);
221b804f
DB
1076 //FIXME dstFilter
1077
1078 for (j=0; j<outVec->length; j++)
1079 {
1080 filter2[i*filter2Size + j]= outVec->coeff[j];
1081 }
1082
1083 (*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2;
1084
1085 if (outVec != &scaleFilter) sws_freeVec(outVec);
1086 }
47b7382d 1087 av_freep(&filter);
221b804f
DB
1088
1089 /* try to reduce the filter-size (step1 find size and shift left) */
86bdf3fd 1090 // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
221b804f
DB
1091 minFilterSize= 0;
1092 for (i=dstW-1; i>=0; i--)
1093 {
1094 int min= filter2Size;
1095 int j;
1096 double cutOff=0.0;
1097
1098 /* get rid off near zero elements on the left by shifting left */
1099 for (j=0; j<filter2Size; j++)
1100 {
1101 int k;
1102 cutOff += FFABS(filter2[i*filter2Size]);
1103
1104 if (cutOff > SWS_MAX_REDUCE_CUTOFF) break;
1105
86bdf3fd 1106 /* preserve monotonicity because the core can't handle the filter otherwise */
221b804f
DB
1107 if (i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;
1108
1109 // Move filter coeffs left
1110 for (k=1; k<filter2Size; k++)
1111 filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
1112 filter2[i*filter2Size + k - 1]= 0.0;
1113 (*filterPos)[i]++;
1114 }
1115
1116 cutOff=0.0;
1117 /* count near zeros on the right */
1118 for (j=filter2Size-1; j>0; j--)
1119 {
1120 cutOff += FFABS(filter2[i*filter2Size + j]);
1121
1122 if (cutOff > SWS_MAX_REDUCE_CUTOFF) break;
1123 min--;
1124 }
1125
1126 if (min>minFilterSize) minFilterSize= min;
1127 }
1128
1129 if (flags & SWS_CPU_CAPS_ALTIVEC) {
1130 // we can handle the special case 4,
1131 // so we don't want to go to the full 8
1132 if (minFilterSize < 5)
8c266f0c
RD
1133 filterAlign = 4;
1134
221b804f
DB
1135 // we really don't want to waste our time
1136 // doing useless computation, so fall-back on
1137 // the scalar C code for very small filter.
1138 // vectorizing is worth it only if you have
1139 // decent-sized vector.
1140 if (minFilterSize < 3)
8c266f0c 1141 filterAlign = 1;
221b804f
DB
1142 }
1143
1144 if (flags & SWS_CPU_CAPS_MMX) {
1145 // special case for unscaled vertical filtering
1146 if (minFilterSize == 1 && filterAlign == 2)
1147 filterAlign= 1;
1148 }
1149
fcc402b1 1150 assert(minFilterSize > 0);
221b804f 1151 filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
fcc402b1 1152 assert(filterSize > 0);
221b804f 1153 filter= av_malloc(filterSize*dstW*sizeof(double));
1625216e 1154 if (filterSize >= MAX_FILTER_SIZE*16/((flags&SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter)
091d3bdc 1155 goto error;
221b804f
DB
1156 *outFilterSize= filterSize;
1157
1158 if (flags&SWS_PRINT_INFO)
1159 av_log(NULL, AV_LOG_VERBOSE, "SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize);
1160 /* try to reduce the filter-size (step2 reduce it) */
1161 for (i=0; i<dstW; i++)
1162 {
1163 int j;
1164
1165 for (j=0; j<filterSize; j++)
1166 {
1167 if (j>=filter2Size) filter[i*filterSize + j]= 0.0;
1168 else filter[i*filterSize + j]= filter2[i*filter2Size + j];
8c266f0c 1169 }
221b804f 1170 }
221b804f
DB
1171
1172
1173 //FIXME try to align filterpos if possible
8c266f0c 1174
221b804f
DB
1175 //fix borders
1176 for (i=0; i<dstW; i++)
1177 {
1178 int j;
1179 if ((*filterPos)[i] < 0)
1180 {
1181 // Move filter coeffs left to compensate for filterPos
1182 for (j=1; j<filterSize; j++)
1183 {
1184 int left= FFMAX(j + (*filterPos)[i], 0);
1185 filter[i*filterSize + left] += filter[i*filterSize + j];
1186 filter[i*filterSize + j]=0;
1187 }
1188 (*filterPos)[i]= 0;
bca11e75
MN
1189 }
1190
221b804f
DB
1191 if ((*filterPos)[i] + filterSize > srcW)
1192 {
1193 int shift= (*filterPos)[i] + filterSize - srcW;
1194 // Move filter coeffs right to compensate for filterPos
1195 for (j=filterSize-2; j>=0; j--)
1196 {
1197 int right= FFMIN(j + shift, filterSize-1);
1198 filter[i*filterSize +right] += filter[i*filterSize +j];
1199 filter[i*filterSize +j]=0;
1200 }
1201 (*filterPos)[i]= srcW - filterSize;
1202 }
1203 }
1204
1205 // Note the +1 is for the MMXscaler which reads over the end
1206 /* align at 16 for AltiVec (needed by hScale_altivec_real) */
1207 *outFilter= av_mallocz(*outFilterSize*(dstW+1)*sizeof(int16_t));
1208
1209 /* Normalize & Store in outFilter */
1210 for (i=0; i<dstW; i++)
1211 {
1212 int j;
1213 double error=0;
1214 double sum=0;
1215 double scale= one;
1216
1217 for (j=0; j<filterSize; j++)
1218 {
1219 sum+= filter[i*filterSize + j];
1220 }
1221 scale/= sum;
1222 for (j=0; j<*outFilterSize; j++)
1223 {
1224 double v= filter[i*filterSize + j]*scale + error;
1225 int intV= floor(v + 0.5);
1226 (*outFilter)[i*(*outFilterSize) + j]= intV;
1227 error = v - intV;
1228 }
1229 }
1230
1231 (*filterPos)[dstW]= (*filterPos)[dstW-1]; // the MMX scaler will read over the end
1232 for (i=0; i<*outFilterSize; i++)
1233 {
1234 int j= dstW*(*outFilterSize);
1235 (*outFilter)[j + i]= (*outFilter)[j + i - (*outFilterSize)];
1236 }
1237
091d3bdc
MN
1238 ret=0;
1239error:
221b804f 1240 av_free(filter);
091d3bdc
MN
1241 av_free(filter2);
1242 return ret;
7630f2e0 1243}
31190492 1244
17c613ef 1245#ifdef COMPILE_MMX2
b7dc6f66 1246static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
28bf81c9 1247{
221b804f
DB
1248 uint8_t *fragmentA;
1249 long imm8OfPShufW1A;
1250 long imm8OfPShufW2A;
1251 long fragmentLengthA;
1252 uint8_t *fragmentB;
1253 long imm8OfPShufW1B;
1254 long imm8OfPShufW2B;
1255 long fragmentLengthB;
1256 int fragmentPos;
1257
1258 int xpos, i;
1259
1260 // create an optimized horizontal scaling routine
1261
1262 //code fragment
1263
1264 asm volatile(
1265 "jmp 9f \n\t"
1266 // Begin
1267 "0: \n\t"
1268 "movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t"
1269 "movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t"
1270 "movd 1(%%"REG_c", %%"REG_S"), %%mm1 \n\t"
1271 "punpcklbw %%mm7, %%mm1 \n\t"
1272 "punpcklbw %%mm7, %%mm0 \n\t"
1273 "pshufw $0xFF, %%mm1, %%mm1 \n\t"
1274 "1: \n\t"
1275 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
1276 "2: \n\t"
1277 "psubw %%mm1, %%mm0 \n\t"
1278 "movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t"
1279 "pmullw %%mm3, %%mm0 \n\t"
1280 "psllw $7, %%mm1 \n\t"
1281 "paddw %%mm1, %%mm0 \n\t"
1282
1283 "movq %%mm0, (%%"REG_D", %%"REG_a") \n\t"
1284
1285 "add $8, %%"REG_a" \n\t"
1286 // End
1287 "9: \n\t"
1288// "int $3 \n\t"
86593486
AS
1289 "lea " LOCAL_MANGLE(0b) ", %0 \n\t"
1290 "lea " LOCAL_MANGLE(1b) ", %1 \n\t"
1291 "lea " LOCAL_MANGLE(2b) ", %2 \n\t"
221b804f
DB
1292 "dec %1 \n\t"
1293 "dec %2 \n\t"
1294 "sub %0, %1 \n\t"
1295 "sub %0, %2 \n\t"
86593486 1296 "lea " LOCAL_MANGLE(9b) ", %3 \n\t"
221b804f
DB
1297 "sub %0, %3 \n\t"
1298
1299
1300 :"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
1301 "=r" (fragmentLengthA)
1302 );
1303
1304 asm volatile(
1305 "jmp 9f \n\t"
1306 // Begin
1307 "0: \n\t"
1308 "movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t"
1309 "movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t"
1310 "punpcklbw %%mm7, %%mm0 \n\t"
1311 "pshufw $0xFF, %%mm0, %%mm1 \n\t"
1312 "1: \n\t"
1313 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
1314 "2: \n\t"
1315 "psubw %%mm1, %%mm0 \n\t"
1316 "movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t"
1317 "pmullw %%mm3, %%mm0 \n\t"
1318 "psllw $7, %%mm1 \n\t"
1319 "paddw %%mm1, %%mm0 \n\t"
1320
1321 "movq %%mm0, (%%"REG_D", %%"REG_a") \n\t"
1322
1323 "add $8, %%"REG_a" \n\t"
1324 // End
1325 "9: \n\t"
1326// "int $3 \n\t"
86593486
AS
1327 "lea " LOCAL_MANGLE(0b) ", %0 \n\t"
1328 "lea " LOCAL_MANGLE(1b) ", %1 \n\t"
1329 "lea " LOCAL_MANGLE(2b) ", %2 \n\t"
221b804f
DB
1330 "dec %1 \n\t"
1331 "dec %2 \n\t"
1332 "sub %0, %1 \n\t"
1333 "sub %0, %2 \n\t"
86593486 1334 "lea " LOCAL_MANGLE(9b) ", %3 \n\t"
221b804f
DB
1335 "sub %0, %3 \n\t"
1336
1337
1338 :"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
1339 "=r" (fragmentLengthB)
1340 );
1341
1342 xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
1343 fragmentPos=0;
1344
1345 for (i=0; i<dstW/numSplits; i++)
1346 {
1347 int xx=xpos>>16;
1348
1349 if ((i&3) == 0)
1350 {
1351 int a=0;
1352 int b=((xpos+xInc)>>16) - xx;
1353 int c=((xpos+xInc*2)>>16) - xx;
1354 int d=((xpos+xInc*3)>>16) - xx;
1355
1356 filter[i ] = (( xpos & 0xFFFF) ^ 0xFFFF)>>9;
1357 filter[i+1] = (((xpos+xInc ) & 0xFFFF) ^ 0xFFFF)>>9;
1358 filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9;
1359 filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
1360 filterPos[i/2]= xx;
1361
1362 if (d+1<4)
1363 {
1364 int maxShift= 3-(d+1);
1365 int shift=0;
1366
1367 memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB);
1368
1369 funnyCode[fragmentPos + imm8OfPShufW1B]=
1370 (a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6);
1371 funnyCode[fragmentPos + imm8OfPShufW2B]=
1372 a | (b<<2) | (c<<4) | (d<<6);
1373
1374 if (i+3>=dstW) shift=maxShift; //avoid overread
1375 else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
1376
1377 if (shift && i>=shift)
1378 {
1379 funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift;
1380 funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift;
1381 filterPos[i/2]-=shift;
1382 }
1383
1384 fragmentPos+= fragmentLengthB;
1385 }
1386 else
1387 {
1388 int maxShift= 3-d;
1389 int shift=0;
1390
1391 memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA);
1392
1393 funnyCode[fragmentPos + imm8OfPShufW1A]=
1394 funnyCode[fragmentPos + imm8OfPShufW2A]=
1395 a | (b<<2) | (c<<4) | (d<<6);
1396
1397 if (i+4>=dstW) shift=maxShift; //avoid overread
1398 else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //partial align
1399
1400 if (shift && i>=shift)
1401 {
1402 funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift;
1403 funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift;
1404 filterPos[i/2]-=shift;
1405 }
1406
1407 fragmentPos+= fragmentLengthA;
1408 }
1409
1410 funnyCode[fragmentPos]= RET;
1411 }
1412 xpos+=xInc;
1413 }
1414 filterPos[i/2]= xpos>>16; // needed to jump to the next part
28bf81c9 1415}
17c613ef 1416#endif /* COMPILE_MMX2 */
28bf81c9 1417
9b2283cc 1418static void globalInit(void){
31190492
A
1419 // generating tables:
1420 int i;
221b804f
DB
1421 for (i=0; i<768; i++){
1422 int c= av_clip_uint8(i-256);
1423 clip_table[i]=c;
b18ea156 1424 }
516b1f82 1425}
c1b0bfb4 1426
516b1f82 1427static SwsFunc getSwsFunc(int flags){
6a4970ab 1428
9bde778e 1429#if defined(RUNTIME_CPUDETECT) && defined (CONFIG_GPL)
3d6a30d9 1430#if defined(ARCH_X86)
c14731d8 1431 // ordered per speed fastest first
221b804f
DB
1432 if (flags & SWS_CPU_CAPS_MMX2)
1433 return swScale_MMX2;
1434 else if (flags & SWS_CPU_CAPS_3DNOW)
1435 return swScale_3DNow;
1436 else if (flags & SWS_CPU_CAPS_MMX)
1437 return swScale_MMX;
1438 else
1439 return swScale_C;
28bf81c9
MN
1440
1441#else
a2faa401 1442#ifdef ARCH_POWERPC
221b804f
DB
1443 if (flags & SWS_CPU_CAPS_ALTIVEC)
1444 return swScale_altivec;
1445 else
1446 return swScale_C;
a2faa401 1447#endif
221b804f 1448 return swScale_C;
3d6a30d9 1449#endif /* defined(ARCH_X86) */
28bf81c9
MN
1450#else //RUNTIME_CPUDETECT
1451#ifdef HAVE_MMX2
221b804f 1452 return swScale_MMX2;
28bf81c9 1453#elif defined (HAVE_3DNOW)
221b804f 1454 return swScale_3DNow;
28bf81c9 1455#elif defined (HAVE_MMX)
221b804f 1456 return swScale_MMX;
a2faa401 1457#elif defined (HAVE_ALTIVEC)
221b804f 1458 return swScale_altivec;
28bf81c9 1459#else
221b804f 1460 return swScale_C;
28bf81c9
MN
1461#endif
1462#endif //!RUNTIME_CPUDETECT
31190492 1463}
7630f2e0 1464
d4e24275 1465static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
221b804f
DB
1466 int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1467 uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1468 /* Copy Y plane */
1469 if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
1470 memcpy(dst, src[0], srcSliceH*dstStride[0]);
1471 else
1472 {
1473 int i;
1474 uint8_t *srcPtr= src[0];
1475 uint8_t *dstPtr= dst;
1476 for (i=0; i<srcSliceH; i++)
1477 {
1478 memcpy(dstPtr, srcPtr, c->srcW);
1479 srcPtr+= srcStride[0];
1480 dstPtr+= dstStride[0];
1481 }
1482 }
1483 dst = dstParam[1] + dstStride[1]*srcSliceY/2;
1484 if (c->dstFormat == PIX_FMT_NV12)
30c48a0a 1485 interleaveBytes(src[1], src[2], dst, c->srcW/2, srcSliceH/2, srcStride[1], srcStride[2], dstStride[0]);
221b804f 1486 else
30c48a0a 1487 interleaveBytes(src[2], src[1], dst, c->srcW/2, srcSliceH/2, srcStride[2], srcStride[1], dstStride[0]);
221b804f
DB
1488
1489 return srcSliceH;
0d9f3d85
A
1490}
1491
d4e24275 1492static int PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
221b804f
DB
1493 int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1494 uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
0d9f3d85 1495
30c48a0a 1496 yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
fccb9b2b 1497
221b804f 1498 return srcSliceH;
0d9f3d85
A
1499}
1500
caeaabe7 1501static int PlanarToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
221b804f
DB
1502 int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1503 uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
caeaabe7 1504
30c48a0a 1505 yv12touyvy(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
caeaabe7 1506
221b804f 1507 return srcSliceH;
caeaabe7
AB
1508}
1509
a6100f39
BC
1510static int YUV422PToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1511 int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1512 uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1513
1514 yuv422ptoyuy2(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
1515
1516 return srcSliceH;
1517}
1518
1519static int YUV422PToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1520 int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1521 uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1522
1523 yuv422ptouyvy(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
1524
1525 return srcSliceH;
1526}
1527
9990e426 1528/* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */
d4e24275 1529static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
221b804f
DB
1530 int srcSliceH, uint8_t* dst[], int dstStride[]){
1531 const int srcFormat= c->srcFormat;
1532 const int dstFormat= c->dstFormat;
1533 const int srcBpp= (fmt_depth(srcFormat) + 7) >> 3;
1534 const int dstBpp= (fmt_depth(dstFormat) + 7) >> 3;
1535 const int srcId= fmt_depth(srcFormat) >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */
1536 const int dstId= fmt_depth(dstFormat) >> 2;
1537 void (*conv)(const uint8_t *src, uint8_t *dst, long src_size)=NULL;
1538
1539 /* BGR -> BGR */
1540 if ( (isBGR(srcFormat) && isBGR(dstFormat))
1541 || (isRGB(srcFormat) && isRGB(dstFormat))){
1542 switch(srcId | (dstId<<4)){
1543 case 0x34: conv= rgb16to15; break;
1544 case 0x36: conv= rgb24to15; break;
1545 case 0x38: conv= rgb32to15; break;
1546 case 0x43: conv= rgb15to16; break;
1547 case 0x46: conv= rgb24to16; break;
1548 case 0x48: conv= rgb32to16; break;
1549 case 0x63: conv= rgb15to24; break;
1550 case 0x64: conv= rgb16to24; break;
1551 case 0x68: conv= rgb32to24; break;
1552 case 0x83: conv= rgb15to32; break;
1553 case 0x84: conv= rgb16to32; break;
1554 case 0x86: conv= rgb24to32; break;
3f0bc115 1555 default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
221b804f
DB
1556 sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
1557 }
1558 }else if ( (isBGR(srcFormat) && isRGB(dstFormat))
1559 || (isRGB(srcFormat) && isBGR(dstFormat))){
1560 switch(srcId | (dstId<<4)){
1561 case 0x33: conv= rgb15tobgr15; break;
1562 case 0x34: conv= rgb16tobgr15; break;
1563 case 0x36: conv= rgb24tobgr15; break;
1564 case 0x38: conv= rgb32tobgr15; break;
1565 case 0x43: conv= rgb15tobgr16; break;
1566 case 0x44: conv= rgb16tobgr16; break;
1567 case 0x46: conv= rgb24tobgr16; break;
1568 case 0x48: conv= rgb32tobgr16; break;
1569 case 0x63: conv= rgb15tobgr24; break;
1570 case 0x64: conv= rgb16tobgr24; break;
1571 case 0x66: conv= rgb24tobgr24; break;
1572 case 0x68: conv= rgb32tobgr24; break;
1573 case 0x83: conv= rgb15tobgr32; break;
1574 case 0x84: conv= rgb16tobgr32; break;
1575 case 0x86: conv= rgb24tobgr32; break;
1576 case 0x88: conv= rgb32tobgr32; break;
3f0bc115 1577 default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
221b804f
DB
1578 sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
1579 }
1580 }else{
3f0bc115 1581 av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
221b804f
DB
1582 sws_format_name(srcFormat), sws_format_name(dstFormat));
1583 }
1584
068b0f4f
BF
1585 if(conv)
1586 {
9990e426
MN
1587 uint8_t *srcPtr= src[0];
1588 if(srcFormat == PIX_FMT_RGB32_1 || srcFormat == PIX_FMT_BGR32_1)
1589 srcPtr += ALT32_CORR;
1590
5efaf000 1591 if (dstStride[0]*srcBpp == srcStride[0]*dstBpp && srcStride[0] > 0)
9990e426 1592 conv(srcPtr, dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
c4ca31d0 1593 else
221b804f 1594 {
c4ca31d0 1595 int i;
c4ca31d0
BF
1596 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1597
1598 for (i=0; i<srcSliceH; i++)
1599 {
1600 conv(srcPtr, dstPtr, c->srcW*srcBpp);
1601 srcPtr+= srcStride[0];
1602 dstPtr+= dstStride[0];
1603 }
221b804f
DB
1604 }
1605 }
1606 return srcSliceH;
0d9f3d85
A
1607}
1608
d4e24275 1609static int bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
221b804f
DB
1610 int srcSliceH, uint8_t* dst[], int dstStride[]){
1611
1612 rgb24toyv12(
1613 src[0],
1614 dst[0]+ srcSliceY *dstStride[0],
1615 dst[1]+(srcSliceY>>1)*dstStride[1],
1616 dst[2]+(srcSliceY>>1)*dstStride[2],
1617 c->srcW, srcSliceH,
1618 dstStride[0], dstStride[1], srcStride[0]);
1619 return srcSliceH;
ec22603f
MN
1620}
1621
d4e24275 1622static int yvu9toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
221b804f
DB
1623 int srcSliceH, uint8_t* dst[], int dstStride[]){
1624 int i;
1625
1626 /* copy Y */
1627 if (srcStride[0]==dstStride[0] && srcStride[0] > 0)
1628 memcpy(dst[0]+ srcSliceY*dstStride[0], src[0], srcStride[0]*srcSliceH);
1629 else{
1630 uint8_t *srcPtr= src[0];
1631 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1632
1633 for (i=0; i<srcSliceH; i++)
1634 {
1635 memcpy(dstPtr, srcPtr, c->srcW);
1636 srcPtr+= srcStride[0];
1637 dstPtr+= dstStride[0];
1638 }
1639 }
1640
1641 if (c->dstFormat==PIX_FMT_YUV420P){
1642 planar2x(src[1], dst[1], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[1]);
1643 planar2x(src[2], dst[2], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[2]);
1644 }else{
1645 planar2x(src[1], dst[2], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[2]);
1646 planar2x(src[2], dst[1], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[1]);
1647 }
1648 return srcSliceH;
b241cbf2
MN
1649}
1650
b6654a54 1651/* unscaled copy like stuff (assumes nearly identical formats) */
2d35ae56 1652static int packedCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
bc5a0444
LB
1653 int srcSliceH, uint8_t* dst[], int dstStride[])
1654{
1655 if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
1656 memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]);
1657 else
1658 {
1659 int i;
1660 uint8_t *srcPtr= src[0];
1661 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1662 int length=0;
221b804f 1663
bc5a0444
LB
1664 /* universal length finder */
1665 while(length+c->srcW <= FFABS(dstStride[0])
1666 && length+c->srcW <= FFABS(srcStride[0])) length+= c->srcW;
fcc402b1 1667 assert(length!=0);
2d35ae56 1668
bc5a0444 1669 for (i=0; i<srcSliceH; i++)
221b804f 1670 {
bc5a0444
LB
1671 memcpy(dstPtr, srcPtr, length);
1672 srcPtr+= srcStride[0];
1673 dstPtr+= dstStride[0];
221b804f 1674 }
bc5a0444 1675 }
2d35ae56
LB
1676 return srcSliceH;
1677}
bc5a0444 1678
2d35ae56
LB
1679static int planarCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1680 int srcSliceH, uint8_t* dst[], int dstStride[])
1681{
bc5a0444
LB
1682 int plane;
1683 for (plane=0; plane<3; plane++)
1684 {
1685 int length= plane==0 ? c->srcW : -((-c->srcW )>>c->chrDstHSubSample);
1686 int y= plane==0 ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
1687 int height= plane==0 ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
2d35ae56 1688
bc5a0444 1689 if ((isGray(c->srcFormat) || isGray(c->dstFormat)) && plane>0)
221b804f 1690 {
bc5a0444
LB
1691 if (!isGray(c->dstFormat))
1692 memset(dst[plane], 128, dstStride[plane]*height);
1693 }
1694 else
1695 {
1696 if (dstStride[plane]==srcStride[plane] && srcStride[plane] > 0)
1697 memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]);
221b804f
DB
1698 else
1699 {
bc5a0444
LB
1700 int i;
1701 uint8_t *srcPtr= src[plane];
1702 uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
1703 for (i=0; i<height; i++)
221b804f 1704 {
bc5a0444
LB
1705 memcpy(dstPtr, srcPtr, length);
1706 srcPtr+= srcStride[plane];
1707 dstPtr+= dstStride[plane];
221b804f
DB
1708 }
1709 }
1710 }
bc5a0444 1711 }
221b804f 1712 return srcSliceH;
37079906 1713}
28bf81c9 1714
4884b9e5 1715static int gray16togray(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
221b804f
DB
1716 int srcSliceH, uint8_t* dst[], int dstStride[]){
1717
1718 int length= c->srcW;
1719 int y= srcSliceY;
1720 int height= srcSliceH;
1721 int i, j;
1722 uint8_t *srcPtr= src[0];
1723 uint8_t *dstPtr= dst[0] + dstStride[0]*y;
1724
1725 if (!isGray(c->dstFormat)){
1726 int height= -((-srcSliceH)>>c->chrDstVSubSample);
1727 memset(dst[1], 128, dstStride[1]*height);
1728 memset(dst[2], 128, dstStride[2]*height);
1729 }
1730 if (c->srcFormat == PIX_FMT_GRAY16LE) srcPtr++;
1731 for (i=0; i<height; i++)
1732 {
1733 for (j=0; j<length; j++) dstPtr[j] = srcPtr[j<<1];
1734 srcPtr+= srcStride[0];
1735 dstPtr+= dstStride[0];
1736 }
1737 return srcSliceH;
4884b9e5
KS
1738}
1739
1740static int graytogray16(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
221b804f
DB
1741 int srcSliceH, uint8_t* dst[], int dstStride[]){
1742
1743 int length= c->srcW;
1744 int y= srcSliceY;
1745 int height= srcSliceH;
1746 int i, j;
1747 uint8_t *srcPtr= src[0];
1748 uint8_t *dstPtr= dst[0] + dstStride[0]*y;
1749 for (i=0; i<height; i++)
1750 {
1751 for (j=0; j<length; j++)
1752 {
1753 dstPtr[j<<1] = srcPtr[j];
1754 dstPtr[(j<<1)+1] = srcPtr[j];
1755 }
1756 srcPtr+= srcStride[0];
1757 dstPtr+= dstStride[0];
1758 }
1759 return srcSliceH;
4884b9e5
KS
1760}
1761
1762static int gray16swap(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
221b804f
DB
1763 int srcSliceH, uint8_t* dst[], int dstStride[]){
1764
1765 int length= c->srcW;
1766 int y= srcSliceY;
1767 int height= srcSliceH;
1768 int i, j;
73d046e2 1769 uint16_t *srcPtr= (uint16_t*)src[0];
b8b015f4 1770 uint16_t *dstPtr= (uint16_t*)(dst[0] + dstStride[0]*y/2);
221b804f
DB
1771 for (i=0; i<height; i++)
1772 {
1773 for (j=0; j<length; j++) dstPtr[j] = bswap_16(srcPtr[j]);
1774 srcPtr+= srcStride[0]/2;
1775 dstPtr+= dstStride[0]/2;
1776 }
1777 return srcSliceH;
4884b9e5
KS
1778}
1779
1780
c7a810cc 1781static void getSubSampleFactors(int *h, int *v, int format){
221b804f
DB
1782 switch(format){
1783 case PIX_FMT_UYVY422:
1784 case PIX_FMT_YUYV422:
1785 *h=1;
1786 *v=0;
1787 break;
1788 case PIX_FMT_YUV420P:
79973335 1789 case PIX_FMT_YUVA420P:
221b804f
DB
1790 case PIX_FMT_GRAY16BE:
1791 case PIX_FMT_GRAY16LE:
1792 case PIX_FMT_GRAY8: //FIXME remove after different subsamplings are fully implemented
1793 case PIX_FMT_NV12:
1794 case PIX_FMT_NV21:
1795 *h=1;
1796 *v=1;
1797 break;
9ba7fe6d
1798 case PIX_FMT_YUV440P:
1799 *h=0;
1800 *v=1;
1801 break;
221b804f
DB
1802 case PIX_FMT_YUV410P:
1803 *h=2;
1804 *v=2;
1805 break;
1806 case PIX_FMT_YUV444P:
1807 *h=0;
1808 *v=0;
1809 break;
1810 case PIX_FMT_YUV422P:
1811 *h=1;
1812 *v=0;
1813 break;
1814 case PIX_FMT_YUV411P:
1815 *h=2;
1816 *v=0;
1817 break;
1818 default:
1819 *h=0;
1820 *v=0;
1821 break;
1822 }
c7a810cc
MN
1823}
1824
5427e242 1825static uint16_t roundToInt16(int64_t f){
221b804f
DB
1826 int r= (f + (1<<15))>>16;
1827 if (r<-0x7FFF) return 0x8000;
1828 else if (r> 0x7FFF) return 0x7FFF;
1829 else return r;
0481412a
MN
1830}
1831
1832/**
5427e242 1833 * @param inv_table the yuv2rgb coeffs, normally Inverse_Table_6_9[x]
86bdf3fd 1834 * @param fullRange if 1 then the luma range is 0..255 if 0 it is 16..235
5427e242 1835 * @return -1 if not supported
0481412a 1836 */
5427e242 1837int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation){
221b804f
DB
1838 int64_t crv = inv_table[0];
1839 int64_t cbu = inv_table[1];
1840 int64_t cgu = -inv_table[2];
1841 int64_t cgv = -inv_table[3];
1842 int64_t cy = 1<<16;
1843 int64_t oy = 0;
1844
221b804f
DB
1845 memcpy(c->srcColorspaceTable, inv_table, sizeof(int)*4);
1846 memcpy(c->dstColorspaceTable, table, sizeof(int)*4);
1847
1848 c->brightness= brightness;
1849 c->contrast = contrast;
1850 c->saturation= saturation;
1851 c->srcRange = srcRange;
1852 c->dstRange = dstRange;
6bc0c792 1853 if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return 0;
221b804f
DB
1854
1855 c->uOffset= 0x0400040004000400LL;
1856 c->vOffset= 0x0400040004000400LL;
1857
1858 if (!srcRange){
1859 cy= (cy*255) / 219;
1860 oy= 16<<16;
1861 }else{
1862 crv= (crv*224) / 255;
1863 cbu= (cbu*224) / 255;
1864 cgu= (cgu*224) / 255;
1865 cgv= (cgv*224) / 255;
1866 }
0481412a 1867
221b804f
DB
1868 cy = (cy *contrast )>>16;
1869 crv= (crv*contrast * saturation)>>32;
1870 cbu= (cbu*contrast * saturation)>>32;
1871 cgu= (cgu*contrast * saturation)>>32;
1872 cgv= (cgv*contrast * saturation)>>32;
0481412a 1873
221b804f 1874 oy -= 256*brightness;
0481412a 1875
221b804f
DB
1876 c->yCoeff= roundToInt16(cy *8192) * 0x0001000100010001ULL;
1877 c->vrCoeff= roundToInt16(crv*8192) * 0x0001000100010001ULL;
1878 c->ubCoeff= roundToInt16(cbu*8192) * 0x0001000100010001ULL;
1879 c->vgCoeff= roundToInt16(cgv*8192) * 0x0001000100010001ULL;
1880 c->ugCoeff= roundToInt16(cgu*8192) * 0x0001000100010001ULL;
1881 c->yOffset= roundToInt16(oy * 8) * 0x0001000100010001ULL;
5427e242 1882
221b804f
DB
1883 yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation);
1884 //FIXME factorize
a31de956 1885
6634d0ef 1886#ifdef COMPILE_ALTIVEC
221b804f
DB
1887 if (c->flags & SWS_CPU_CAPS_ALTIVEC)
1888 yuv2rgb_altivec_init_tables (c, inv_table, brightness, contrast, saturation);
6a4970ab 1889#endif
221b804f 1890 return 0;
5427e242
MN
1891}
1892
1893/**
1894 * @return -1 if not supported
1895 */
1896int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation){
221b804f 1897 if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
5427e242 1898
221b804f
DB
1899 *inv_table = c->srcColorspaceTable;
1900 *table = c->dstColorspaceTable;
1901 *srcRange = c->srcRange;
1902 *dstRange = c->dstRange;
1903 *brightness= c->brightness;
1904 *contrast = c->contrast;
1905 *saturation= c->saturation;
6a4970ab 1906
221b804f 1907 return 0;
0481412a
MN
1908}
1909
44cdb423
LA
1910static int handle_jpeg(int *format)
1911{
221b804f
DB
1912 switch (*format) {
1913 case PIX_FMT_YUVJ420P:
1914 *format = PIX_FMT_YUV420P;
1915 return 1;
1916 case PIX_FMT_YUVJ422P:
1917 *format = PIX_FMT_YUV422P;
1918 return 1;
1919 case PIX_FMT_YUVJ444P:
1920 *format = PIX_FMT_YUV444P;
1921 return 1;
9ba7fe6d
1922 case PIX_FMT_YUVJ440P:
1923 *format = PIX_FMT_YUV440P;
1924 return 1;
221b804f
DB
1925 default:
1926 return 0;
1927 }
44cdb423
LA
1928}
1929
e9e12f0e 1930SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat, int flags,
221b804f
DB
1931 SwsFilter *srcFilter, SwsFilter *dstFilter, double *param){
1932
1933 SwsContext *c;
1934 int i;
1935 int usesVFilter, usesHFilter;
1936 int unscaled, needsDither;
1937 int srcRange, dstRange;
1938 SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
3d6a30d9 1939#if defined(ARCH_X86)
221b804f
DB
1940 if (flags & SWS_CPU_CAPS_MMX)
1941 asm volatile("emms\n\t"::: "memory");
5cebb24b 1942#endif
516b1f82 1943
9bde778e 1944#if !defined(RUNTIME_CPUDETECT) || !defined (CONFIG_GPL) //ensure that the flags match the compiled variant if cpudetect is off
d3f3eea9 1945 flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC|SWS_CPU_CAPS_BFIN);
516b1f82 1946#ifdef HAVE_MMX2
221b804f 1947 flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
516b1f82 1948#elif defined (HAVE_3DNOW)
221b804f 1949 flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
516b1f82 1950#elif defined (HAVE_MMX)
221b804f 1951 flags |= SWS_CPU_CAPS_MMX;
a2faa401 1952#elif defined (HAVE_ALTIVEC)
221b804f 1953 flags |= SWS_CPU_CAPS_ALTIVEC;
721d5e3b 1954#elif defined (ARCH_BFIN)
d3f3eea9 1955 flags |= SWS_CPU_CAPS_BFIN;
516b1f82 1956#endif
69796008 1957#endif /* RUNTIME_CPUDETECT */
221b804f 1958 if (clip_table[512] != 255) globalInit();
1b0a4572 1959 if (!rgb15to16) sws_rgb2rgb_init(flags);
221b804f
DB
1960
1961 unscaled = (srcW == dstW && srcH == dstH);
1962 needsDither= (isBGR(dstFormat) || isRGB(dstFormat))
1963 && (fmt_depth(dstFormat))<24
1964 && ((fmt_depth(dstFormat))<(fmt_depth(srcFormat)) || (!(isRGB(srcFormat) || isBGR(srcFormat))));
1965
1966 srcRange = handle_jpeg(&srcFormat);
1967 dstRange = handle_jpeg(&dstFormat);
1968
1969 if (!isSupportedIn(srcFormat))
1970 {
0d6fd5ec 1971 av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as input pixel format\n", sws_format_name(srcFormat));
221b804f
DB
1972 return NULL;
1973 }
1974 if (!isSupportedOut(dstFormat))
1975 {
0d6fd5ec 1976 av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as output pixel format\n", sws_format_name(dstFormat));
221b804f
DB
1977 return NULL;
1978 }
1979
010c00bc
MN
1980 i= flags & ( SWS_POINT
1981 |SWS_AREA
6afc7c19 1982 |SWS_BILINEAR
010c00bc
MN
1983 |SWS_FAST_BILINEAR
1984 |SWS_BICUBIC
1985 |SWS_X
1986 |SWS_GAUSS
1987 |SWS_LANCZOS
1988 |SWS_SINC
1989 |SWS_SPLINE
1990 |SWS_BICUBLIN);
1991 if(!i || (i & (i-1)))
1992 {
1993 av_log(NULL, AV_LOG_ERROR, "swScaler: Exactly one scaler algorithm must be choosen\n");
1994 return NULL;
1995 }
1996
1997
221b804f
DB
1998 /* sanity check */
1999 if (srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
2000 {
2001 av_log(NULL, AV_LOG_ERROR, "swScaler: %dx%d -> %dx%d is invalid scaling dimension\n",
2002 srcW, srcH, dstW, dstH);
2003 return NULL;
2004 }
8b2fce0d
MN
2005 if(srcW > VOFW || dstW > VOFW){
2006 av_log(NULL, AV_LOG_ERROR, "swScaler: Compile time max width is "AV_STRINGIFY(VOFW)" change VOF/VOFW and recompile\n");
2007 return NULL;
2008 }
221b804f
DB
2009
2010 if (!dstFilter) dstFilter= &dummyFilter;
2011 if (!srcFilter) srcFilter= &dummyFilter;
2012
2013 c= av_mallocz(sizeof(SwsContext));
2014
2015 c->av_class = &sws_context_class;
2016 c->srcW= srcW;
2017 c->srcH= srcH;
2018 c->dstW= dstW;
2019 c->dstH= dstH;
2020 c->lumXInc= ((srcW<<16) + (dstW>>1))/dstW;
2021 c->lumYInc= ((srcH<<16) + (dstH>>1))/dstH;
2022 c->flags= flags;
2023 c->dstFormat= dstFormat;
2024 c->srcFormat= srcFormat;
2025 c->vRounder= 4* 0x0001000100010001ULL;
2026
2027 usesHFilter= usesVFilter= 0;
1b0a4572
BF
2028 if (dstFilter->lumV && dstFilter->lumV->length>1) usesVFilter=1;
2029 if (dstFilter->lumH && dstFilter->lumH->length>1) usesHFilter=1;
2030 if (dstFilter->chrV && dstFilter->chrV->length>1) usesVFilter=1;
2031 if (dstFilter->chrH && dstFilter->chrH->length>1) usesHFilter=1;
2032 if (srcFilter->lumV && srcFilter->lumV->length>1) usesVFilter=1;
2033 if (srcFilter->lumH && srcFilter->lumH->length>1) usesHFilter=1;
2034 if (srcFilter->chrV && srcFilter->chrV->length>1) usesVFilter=1;
2035 if (srcFilter->chrH && srcFilter->chrH->length>1) usesHFilter=1;
221b804f
DB
2036
2037 getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
2038 getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
2039
2040 // reuse chroma for 2 pixles rgb/bgr unless user wants full chroma interpolation
2041 if ((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1;
2042
2043 // drop some chroma lines if the user wants it
2044 c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT;
2045 c->chrSrcVSubSample+= c->vChrDrop;
2046
2047 // drop every 2. pixel for chroma calculation unless user wants full chroma
2048 if ((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP)
2049 && srcFormat!=PIX_FMT_RGB8 && srcFormat!=PIX_FMT_BGR8
2050 && srcFormat!=PIX_FMT_RGB4 && srcFormat!=PIX_FMT_BGR4
dfb09bd1 2051 && srcFormat!=PIX_FMT_RGB4_BYTE && srcFormat!=PIX_FMT_BGR4_BYTE
2f60f629 2052 && ((dstW>>c->chrDstHSubSample) <= (srcW>>1) || (flags&(SWS_FAST_BILINEAR|SWS_POINT))))
221b804f
DB
2053 c->chrSrcHSubSample=1;
2054
2055 if (param){
2056 c->param[0] = param[0];
2057 c->param[1] = param[1];
2058 }else{
2059 c->param[0] =
2060 c->param[1] = SWS_PARAM_DEFAULT;
2061 }
2062
2063 c->chrIntHSubSample= c->chrDstHSubSample;
2064 c->chrIntVSubSample= c->chrSrcVSubSample;
2065
2066 // Note the -((-x)>>y) is so that we always round toward +inf.
2067 c->chrSrcW= -((-srcW) >> c->chrSrcHSubSample);
2068 c->chrSrcH= -((-srcH) >> c->chrSrcVSubSample);
2069 c->chrDstW= -((-dstW) >> c->chrDstHSubSample);
2070 c->chrDstH= -((-dstH) >> c->chrDstVSubSample);
2071
2072 sws_setColorspaceDetails(c, Inverse_Table_6_9[SWS_CS_DEFAULT], srcRange, Inverse_Table_6_9[SWS_CS_DEFAULT] /* FIXME*/, dstRange, 0, 1<<16, 1<<16);
2073
2074 /* unscaled special Cases */
6bc0c792 2075 if (unscaled && !usesHFilter && !usesVFilter && (srcRange == dstRange || isBGR(dstFormat) || isRGB(dstFormat)))
221b804f
DB
2076 {
2077 /* yv12_to_nv12 */
2078 if (srcFormat == PIX_FMT_YUV420P && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21))
2079 {
2080 c->swScale= PlanarToNV12Wrapper;
2081 }
9bde778e 2082#ifdef CONFIG_GPL
221b804f
DB
2083 /* yuv2bgr */
2084 if ((srcFormat==PIX_FMT_YUV420P || srcFormat==PIX_FMT_YUV422P) && (isBGR(dstFormat) || isRGB(dstFormat)))
2085 {
2086 c->swScale= yuv2rgb_get_func_ptr(c);
2087 }
9bde778e 2088#endif
6a4970ab 2089
e5091488 2090 if (srcFormat==PIX_FMT_YUV410P && dstFormat==PIX_FMT_YUV420P)
221b804f
DB
2091 {
2092 c->swScale= yvu9toyv12Wrapper;
2093 }
2094
2095 /* bgr24toYV12 */
2096 if (srcFormat==PIX_FMT_BGR24 && dstFormat==PIX_FMT_YUV420P)
2097 c->swScale= bgr24toyv12Wrapper;
2098
2099 /* rgb/bgr -> rgb/bgr (no dither needed forms) */
2100 if ( (isBGR(srcFormat) || isRGB(srcFormat))
2101 && (isBGR(dstFormat) || isRGB(dstFormat))
2102 && srcFormat != PIX_FMT_BGR8 && dstFormat != PIX_FMT_BGR8
2103 && srcFormat != PIX_FMT_RGB8 && dstFormat != PIX_FMT_RGB8
2104 && srcFormat != PIX_FMT_BGR4 && dstFormat != PIX_FMT_BGR4
2105 && srcFormat != PIX_FMT_RGB4 && dstFormat != PIX_FMT_RGB4
2106 && srcFormat != PIX_FMT_BGR4_BYTE && dstFormat != PIX_FMT_BGR4_BYTE
2107 && srcFormat != PIX_FMT_RGB4_BYTE && dstFormat != PIX_FMT_RGB4_BYTE
2108 && srcFormat != PIX_FMT_MONOBLACK && dstFormat != PIX_FMT_MONOBLACK
9990e426
MN
2109 && dstFormat != PIX_FMT_RGB32_1
2110 && dstFormat != PIX_FMT_BGR32_1
736143c8 2111 && (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT))))
221b804f
DB
2112 c->swScale= rgb2rgbWrapper;
2113
a6100f39
BC
2114 if (srcFormat == PIX_FMT_YUV422P)
2115 {
2116 if (dstFormat == PIX_FMT_YUYV422)
2117 c->swScale= YUV422PToYuy2Wrapper;
2118 else if (dstFormat == PIX_FMT_UYVY422)
2119 c->swScale= YUV422PToUyvyWrapper;
2120 }
2121
221b804f
DB
2122 /* LQ converters if -sws 0 or -sws 4*/
2123 if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)){
221b804f 2124 /* yv12_to_yuy2 */
3b38f1c6 2125 if (srcFormat == PIX_FMT_YUV420P)
221b804f
DB
2126 {
2127 if (dstFormat == PIX_FMT_YUYV422)
2128 c->swScale= PlanarToYuy2Wrapper;
3b38f1c6 2129 else if (dstFormat == PIX_FMT_UYVY422)
221b804f
DB
2130 c->swScale= PlanarToUyvyWrapper;
2131 }
2132 }
ec22603f 2133
6634d0ef 2134#ifdef COMPILE_ALTIVEC
221b804f 2135 if ((c->flags & SWS_CPU_CAPS_ALTIVEC) &&
3b38f1c6 2136 srcFormat == PIX_FMT_YUV420P) {
221b804f
DB
2137 // unscaled YV12 -> packed YUV, we want speed
2138 if (dstFormat == PIX_FMT_YUYV422)
2139 c->swScale= yv12toyuy2_unscaled_altivec;
3b38f1c6 2140 else if (dstFormat == PIX_FMT_UYVY422)
221b804f
DB
2141 c->swScale= yv12touyvy_unscaled_altivec;
2142 }
b71cf33c
RD
2143#endif
2144
221b804f
DB
2145 /* simple copy */
2146 if ( srcFormat == dstFormat
2147 || (isPlanarYUV(srcFormat) && isGray(dstFormat))
e5091488 2148 || (isPlanarYUV(dstFormat) && isGray(srcFormat)))
221b804f 2149 {
2d35ae56
LB
2150 if (isPacked(c->srcFormat))
2151 c->swScale= packedCopy;
2152 else /* Planar YUV or gray */
2153 c->swScale= planarCopy;
221b804f
DB
2154 }
2155
2156 /* gray16{le,be} conversions */
2157 if (isGray16(srcFormat) && (isPlanarYUV(dstFormat) || (dstFormat == PIX_FMT_GRAY8)))
2158 {
2159 c->swScale= gray16togray;
2160 }
2161 if ((isPlanarYUV(srcFormat) || (srcFormat == PIX_FMT_GRAY8)) && isGray16(dstFormat))
2162 {
2163 c->swScale= graytogray16;
2164 }
2165 if (srcFormat != dstFormat && isGray16(srcFormat) && isGray16(dstFormat))
2166 {
2167 c->swScale= gray16swap;
2168 }
2169
1ebbfe15
MH
2170#ifdef ARCH_BFIN
2171 if (flags & SWS_CPU_CAPS_BFIN)
2172 ff_bfin_get_unscaled_swscale (c);
2173#endif
2174
221b804f
DB
2175 if (c->swScale){
2176 if (flags&SWS_PRINT_INFO)
4b0c30b7 2177 av_log(c, AV_LOG_INFO, "using unscaled %s -> %s special converter\n",
221b804f
DB
2178 sws_format_name(srcFormat), sws_format_name(dstFormat));
2179 return c;
2180 }
2181 }
2182
2183 if (flags & SWS_CPU_CAPS_MMX2)
2184 {
2185 c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
2186 if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR))
2187 {
2188 if (flags&SWS_PRINT_INFO)
4b0c30b7 2189 av_log(c, AV_LOG_INFO, "output Width is not a multiple of 32 -> no MMX2 scaler\n");
221b804f
DB
2190 }
2191 if (usesHFilter) c->canMMX2BeUsed=0;
2192 }
2193 else
2194 c->canMMX2BeUsed=0;
2195
2196 c->chrXInc= ((c->chrSrcW<<16) + (c->chrDstW>>1))/c->chrDstW;
2197 c->chrYInc= ((c->chrSrcH<<16) + (c->chrDstH>>1))/c->chrDstH;
2198
2199 // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
2200 // but only for the FAST_BILINEAR mode otherwise do correct scaling
2201 // n-2 is the last chrominance sample available
86bdf3fd 2202 // this is not perfect, but no one should notice the difference, the more correct variant
221b804f
DB
2203 // would be like the vertical one, but that would require some special code for the
2204 // first and last pixel
2205 if (flags&SWS_FAST_BILINEAR)
2206 {
2207 if (c->canMMX2BeUsed)
2208 {
2209 c->lumXInc+= 20;
2210 c->chrXInc+= 20;
2211 }
2212 //we don't use the x86asm scaler if mmx is available
2213 else if (flags & SWS_CPU_CAPS_MMX)
2214 {
2215 c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
2216 c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
2217 }
2218 }
2219
2220 /* precalculate horizontal scaler filter coefficients */
2221 {
2222 const int filterAlign=
2223 (flags & SWS_CPU_CAPS_MMX) ? 4 :
2224 (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
2225 1;
2226
2227 initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc,
2228 srcW , dstW, filterAlign, 1<<14,
2229 (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags,
2230 srcFilter->lumH, dstFilter->lumH, c->param);
2231 initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc,
2232 c->chrSrcW, c->chrDstW, filterAlign, 1<<14,
2233 (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
2234 srcFilter->chrH, dstFilter->chrH, c->param);
28bf81c9 2235
dbdae6ec 2236#define MAX_FUNNY_CODE_SIZE 10000
17c613ef 2237#if defined(COMPILE_MMX2)
77a416e8 2238// can't downscale !!!
221b804f
DB
2239 if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
2240 {
113ef149 2241#ifdef MAP_ANONYMOUS
221b804f
DB
2242 c->funnyYCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
2243 c->funnyUVCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
38d5c282 2244#else
221b804f
DB
2245 c->funnyYCode = av_malloc(MAX_FUNNY_CODE_SIZE);
2246 c->funnyUVCode = av_malloc(MAX_FUNNY_CODE_SIZE);
38d5c282
AJ
2247#endif
2248
221b804f
DB
2249 c->lumMmx2Filter = av_malloc((dstW /8+8)*sizeof(int16_t));
2250 c->chrMmx2Filter = av_malloc((c->chrDstW /4+8)*sizeof(int16_t));
2251 c->lumMmx2FilterPos= av_malloc((dstW /2/8+8)*sizeof(int32_t));
2252 c->chrMmx2FilterPos= av_malloc((c->chrDstW/2/4+8)*sizeof(int32_t));
b7dc6f66 2253
221b804f
DB
2254 initMMX2HScaler( dstW, c->lumXInc, c->funnyYCode , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
2255 initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
2256 }
17c613ef 2257#endif /* defined(COMPILE_MMX2) */
221b804f 2258 } // Init Horizontal stuff
28bf81c9
MN
2259
2260
2261
221b804f
DB
2262 /* precalculate vertical scaler filter coefficients */
2263 {
2264 const int filterAlign=
2265 (flags & SWS_CPU_CAPS_MMX) && (flags & SWS_ACCURATE_RND) ? 2 :
2266 (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
2267 1;
8c266f0c 2268
221b804f 2269 initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
5fe4aad7 2270 srcH , dstH, filterAlign, (1<<12),
221b804f
DB
2271 (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags,
2272 srcFilter->lumV, dstFilter->lumV, c->param);
2273 initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
5fe4aad7 2274 c->chrSrcH, c->chrDstH, filterAlign, (1<<12),
221b804f
DB
2275 (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
2276 srcFilter->chrV, dstFilter->chrV, c->param);
d33d485e
AC
2277
2278#ifdef HAVE_ALTIVEC
221b804f
DB
2279 c->vYCoeffsBank = av_malloc(sizeof (vector signed short)*c->vLumFilterSize*c->dstH);
2280 c->vCCoeffsBank = av_malloc(sizeof (vector signed short)*c->vChrFilterSize*c->chrDstH);
2281
2282 for (i=0;i<c->vLumFilterSize*c->dstH;i++) {
2283 int j;
2284 short *p = (short *)&c->vYCoeffsBank[i];
2285 for (j=0;j<8;j++)
2286 p[j] = c->vLumFilter[i];
2287 }
2288
2289 for (i=0;i<c->vChrFilterSize*c->chrDstH;i++) {
2290 int j;
2291 short *p = (short *)&c->vCCoeffsBank[i];
2292 for (j=0;j<8;j++)
2293 p[j] = c->vChrFilter[i];
2294 }
d33d485e 2295#endif
221b804f
DB
2296 }
2297
2298 // Calculate Buffer Sizes so that they won't run out while handling these damn slices
2299 c->vLumBufSize= c->vLumFilterSize;
2300 c->vChrBufSize= c->vChrFilterSize;
2301 for (i=0; i<dstH; i++)
2302 {
2303 int chrI= i*c->chrDstH / dstH;
2304 int nextSlice= FFMAX(c->vLumFilterPos[i ] + c->vLumFilterSize - 1,
2305 ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample));
2306
2307 nextSlice>>= c->chrSrcVSubSample;
2308 nextSlice<<= c->chrSrcVSubSample;
2309 if (c->vLumFilterPos[i ] + c->vLumBufSize < nextSlice)
e5091488 2310 c->vLumBufSize= nextSlice - c->vLumFilterPos[i];
221b804f
DB
2311 if (c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>c->chrSrcVSubSample))
2312 c->vChrBufSize= (nextSlice>>c->chrSrcVSubSample) - c->vChrFilterPos[chrI];
2313 }
2314
2315 // allocate pixbufs (we use dynamic allocation because otherwise we would need to
2316 c->lumPixBuf= av_malloc(c->vLumBufSize*2*sizeof(int16_t*));
2317 c->chrPixBuf= av_malloc(c->vChrBufSize*2*sizeof(int16_t*));
2318 //Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000)
2319 /* align at 16 bytes for AltiVec */
2320 for (i=0; i<c->vLumBufSize; i++)
8b2fce0d 2321 c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= av_mallocz(VOF+1);
221b804f 2322 for (i=0; i<c->vChrBufSize; i++)
8b2fce0d 2323 c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= av_malloc((VOF+1)*2);
221b804f
DB
2324
2325 //try to avoid drawing green stuff between the right end and the stride end
8b2fce0d
MN
2326 for (i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, (VOF+1)*2);
2327
5a352b14 2328 assert(2*VOFW == VOF);
221b804f 2329
fcc402b1 2330 assert(c->chrDstH <= dstH);
221b804f
DB
2331
2332 if (flags&SWS_PRINT_INFO)
2333 {
28bf81c9 2334#ifdef DITHER1XBPP
3e62b7e3 2335 const char *dither= " dithered";
5521b193 2336#else
3e62b7e3 2337 const char *dither= "";
28bf81c9 2338#endif
221b804f 2339 if (flags&SWS_FAST_BILINEAR)
4b0c30b7 2340 av_log(c, AV_LOG_INFO, "FAST_BILINEAR scaler, ");
221b804f 2341 else if (flags&SWS_BILINEAR)
4b0c30b7 2342 av_log(c, AV_LOG_INFO, "BILINEAR scaler, ");
221b804f 2343 else if (flags&SWS_BICUBIC)
4b0c30b7 2344 av_log(c, AV_LOG_INFO, "BICUBIC scaler, ");
221b804f 2345 else if (flags&SWS_X)
4b0c30b7 2346 av_log(c, AV_LOG_INFO, "Experimental scaler, ");
221b804f 2347 else if (flags&SWS_POINT)
4b0c30b7 2348 av_log(c, AV_LOG_INFO, "Nearest Neighbor / POINT scaler, ");
221b804f 2349 else if (flags&SWS_AREA)
4b0c30b7 2350 av_log(c, AV_LOG_INFO, "Area Averageing scaler, ");
221b804f 2351 else if (flags&SWS_BICUBLIN)
4b0c30b7 2352 av_log(c, AV_LOG_INFO, "luma BICUBIC / chroma BILINEAR scaler, ");
221b804f 2353 else if (flags&SWS_GAUSS)
4b0c30b7 2354 av_log(c, AV_LOG_INFO, "Gaussian scaler, ");
221b804f 2355 else if (flags&SWS_SINC)
4b0c30b7 2356 av_log(c, AV_LOG_INFO, "Sinc scaler, ");
221b804f 2357 else if (flags&SWS_LANCZOS)
4b0c30b7 2358 av_log(c, AV_LOG_INFO, "Lanczos scaler, ");
221b804f 2359 else if (flags&SWS_SPLINE)
4b0c30b7 2360 av_log(c, AV_LOG_INFO, "Bicubic spline scaler, ");
221b804f 2361 else
4b0c30b7 2362 av_log(c, AV_LOG_INFO, "ehh flags invalid?! ");
221b804f
DB
2363
2364 if (dstFormat==PIX_FMT_BGR555 || dstFormat==PIX_FMT_BGR565)
2365 av_log(c, AV_LOG_INFO, "from %s to%s %s ",
2366 sws_format_name(srcFormat), dither, sws_format_name(dstFormat));
2367 else
2368 av_log(c, AV_LOG_INFO, "from %s to %s ",
2369 sws_format_name(srcFormat), sws_format_name(dstFormat));
2370
2371 if (flags & SWS_CPU_CAPS_MMX2)
2372 av_log(c, AV_LOG_INFO, "using MMX2\n");
2373 else if (flags & SWS_CPU_CAPS_3DNOW)
2374 av_log(c, AV_LOG_INFO, "using 3DNOW\n");
2375 else if (flags & SWS_CPU_CAPS_MMX)
2376 av_log(c, AV_LOG_INFO, "using MMX\n");
2377 else if (flags & SWS_CPU_CAPS_ALTIVEC)
2378 av_log(c, AV_LOG_INFO, "using AltiVec\n");
2379 else
2380 av_log(c, AV_LOG_INFO, "using C\n");
2381 }
2382
2383 if (flags & SWS_PRINT_INFO)
2384 {
2385 if (flags & SWS_CPU_CAPS_MMX)
2386 {
2387 if (c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR))
4b0c30b7 2388 av_log(c, AV_LOG_VERBOSE, "using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
221b804f
DB
2389 else
2390 {
2391 if (c->hLumFilterSize==4)
4b0c30b7 2392 av_log(c, AV_LOG_VERBOSE, "using 4-tap MMX scaler for horizontal luminance scaling\n");
221b804f 2393 else if (c->hLumFilterSize==8)
4b0c30b7 2394 av_log(c, AV_LOG_VERBOSE, "using 8-tap MMX scaler for horizontal luminance scaling\n");
221b804f 2395 else
4b0c30b7 2396 av_log(c, AV_LOG_VERBOSE, "using n-tap MMX scaler for horizontal luminance scaling\n");
221b804f
DB
2397
2398 if (c->hChrFilterSize==4)
4b0c30b7 2399 av_log(c, AV_LOG_VERBOSE, "using 4-tap MMX scaler for horizontal chrominance scaling\n");
221b804f 2400 else if (c->hChrFilterSize==8)
4b0c30b7 2401 av_log(c, AV_LOG_VERBOSE, "using 8-tap MMX scaler for horizontal chrominance scaling\n");
221b804f 2402 else
4b0c30b7 2403 av_log(c, AV_LOG_VERBOSE, "using n-tap MMX scaler for horizontal chrominance scaling\n");
221b804f
DB
2404 }
2405 }
2406 else
2407 {
3d6a30d9 2408#if defined(ARCH_X86)
4b0c30b7 2409 av_log(c, AV_LOG_VERBOSE, "using X86-Asm scaler for horizontal scaling\n");
28bf81c9 2410#else
221b804f 2411 if (flags & SWS_FAST_BILINEAR)
4b0c30b7 2412 av_log(c, AV_LOG_VERBOSE, "using FAST_BILINEAR C scaler for horizontal scaling\n");
221b804f 2413 else
4b0c30b7 2414 av_log(c, AV_LOG_VERBOSE, "using C scaler for horizontal scaling\n");
28bf81c9 2415#endif
221b804f
DB
2416 }
2417 if (isPlanarYUV(dstFormat))
2418 {
2419 if (c->vLumFilterSize==1)
4b0c30b7 2420 av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
221b804f 2421 else
4b0c30b7 2422 av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
221b804f
DB
2423 }
2424 else
2425 {
2426 if (c->vLumFilterSize==1 && c->vChrFilterSize==2)
4b0c30b7
BC
2427 av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
2428 " 2-tap scaler for vertical chrominance scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
221b804f 2429 else if (c->vLumFilterSize==2 && c->vChrFilterSize==2)
4b0c30b7 2430 av_log(c, AV_LOG_VERBOSE, "using 2-tap linear %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
221b804f 2431 else
4b0c30b7 2432 av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
221b804f
DB
2433 }
2434
2435 if (dstFormat==PIX_FMT_BGR24)
4b0c30b7 2436 av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR24 Converter\n",
221b804f
DB
2437 (flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"));
2438 else if (dstFormat==PIX_FMT_RGB32)
4b0c30b7 2439 av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR32 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
221b804f 2440 else if (dstFormat==PIX_FMT_BGR565)
4b0c30b7 2441 av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
221b804f 2442 else if (dstFormat==PIX_FMT_BGR555)
4b0c30b7 2443 av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
221b804f 2444
4b0c30b7 2445 av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
221b804f
DB
2446 }
2447 if (flags & SWS_PRINT_INFO)
2448 {
4b0c30b7 2449 av_log(c, AV_LOG_DEBUG, "Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
221b804f 2450 c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
4b0c30b7 2451 av_log(c, AV_LOG_DEBUG, "Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
221b804f
DB
2452 c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc);
2453 }
2454
2455 c->swScale= getSwsFunc(flags);
2456 return c;
28bf81c9
MN
2457}
2458
2459/**
56b69633 2460 * swscale wrapper, so we don't need to export the SwsContext.
fccb9b2b
MN
2461 * assumes planar YUV to be in YUV order instead of YVU
2462 */
703b56fb 2463int sws_scale(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
221b804f
DB
2464 int srcSliceH, uint8_t* dst[], int dstStride[]){
2465 int i;
2466 uint8_t* src2[4]= {src[0], src[1], src[2]};
2467 uint32_t pal[256];
14623020
MN
2468 int use_pal= c->srcFormat == PIX_FMT_PAL8
2469 || c->srcFormat == PIX_FMT_BGR4_BYTE
2470 || c->srcFormat == PIX_FMT_RGB4_BYTE
2471 || c->srcFormat == PIX_FMT_BGR8
2472 || c->srcFormat == PIX_FMT_RGB8;
2473
221b804f 2474 if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) {
3f0bc115 2475 av_log(c, AV_LOG_ERROR, "Slices start in the middle!\n");
221b804f
DB
2476 return 0;
2477 }
2478 if (c->sliceDir == 0) {
2479 if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1;
2480 }
2481
14623020 2482 if (use_pal){
221b804f 2483 for (i=0; i<256; i++){
14623020
MN
2484 int p, r, g, b,y,u,v;
2485 if(c->srcFormat == PIX_FMT_PAL8){
2486 p=((uint32_t*)(src[1]))[i];
2487 r= (p>>16)&0xFF;
2488 g= (p>> 8)&0xFF;
2489 b= p &0xFF;
2490 }else if(c->srcFormat == PIX_FMT_RGB8){
2491 r= (i>>5 )*36;
2492 g= ((i>>2)&7)*36;
2493 b= (i&3 )*85;
2494 }else if(c->srcFormat == PIX_FMT_BGR8){
2495 b= (i>>6 )*85;
2496 g= ((i>>3)&7)*36;
2497 r= (i&7 )*36;
2498 }else if(c->srcFormat == PIX_FMT_RGB4_BYTE){
2499 r= (i>>3 )*255;
2500 g= ((i>>1)&3)*85;
2501 b= (i&1 )*255;
2502 }else if(c->srcFormat == PIX_FMT_BGR4_BYTE){
2503 b= (i>>3 )*255;
2504 g= ((i>>1)&3)*85;
2505 r= (i&1 )*255;
2506 }
cbcb408f
MN
2507 y= av_clip_uint8((RY*r + GY*g + BY*b + ( 33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
2508 u= av_clip_uint8((RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
2509 v= av_clip_uint8((RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
221b804f 2510 pal[i]= y + (u<<8) + (v<<16);
21c08a3f 2511 }
74498eb4 2512 src2[1]= (uint8_t*)pal;
221b804f 2513 }
21c08a3f 2514
221b804f
DB
2515 // copy strides, so they can safely be modified
2516 if (c->sliceDir == 1) {
2517 // slices go from top to bottom
2518 int srcStride2[4]= {srcStride[0], srcStride[1], srcStride[2]};
2519 int dstStride2[4]= {dstStride[0], dstStride[1], dstStride[2]};
2520 return c->swScale(c, src2, srcStride2, srcSliceY, srcSliceH, dst, dstStride2);
2521 } else {
2522 // slices go from bottom to top => we flip the image internally
2523 uint8_t* dst2[4]= {dst[0] + (c->dstH-1)*dstStride[0],
2524 dst[1] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[1],
2525 dst[2] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[2]};
2526 int srcStride2[4]= {-srcStride[0], -srcStride[1], -srcStride[2]};
2527 int dstStride2[4]= {-dstStride[0], -dstStride[1], -dstStride[2]};
2528
2529 src2[0] += (srcSliceH-1)*srcStride[0];
14623020 2530 if (!use_pal)
221b804f
DB
2531 src2[1] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[1];
2532 src2[2] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[2];
2533
2534 return c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, srcSliceH, dst2, dstStride2);
2535 }
fccb9b2b
MN
2536}
2537
2538/**
56b69633 2539 * swscale wrapper, so we don't need to export the SwsContext
d4e24275 2540 */
703b56fb 2541int sws_scale_ordered(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
221b804f
DB
2542 int srcSliceH, uint8_t* dst[], int dstStride[]){
2543 return sws_scale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride);
d4e24275 2544}
d4e24275 2545
6a4970ab 2546SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
221b804f
DB
2547 float lumaSharpen, float chromaSharpen,
2548 float chromaHShift, float chromaVShift,
2549 int verbose)
e21206a8 2550{
221b804f
DB
2551 SwsFilter *filter= av_malloc(sizeof(SwsFilter));
2552
2553 if (lumaGBlur!=0.0){
2554 filter->lumH= sws_getGaussianVec(lumaGBlur, 3.0);
2555 filter->lumV= sws_getGaussianVec(lumaGBlur, 3.0);
2556 }else{
2557 filter->lumH= sws_getIdentityVec();
2558 filter->lumV= sws_getIdentityVec();
2559 }
2560
2561 if (chromaGBlur!=0.0){
2562 filter->chrH= sws_getGaussianVec(chromaGBlur, 3.0);
2563 filter->chrV= sws_getGaussianVec(chromaGBlur, 3.0);
2564 }else{
2565 filter->chrH= sws_getIdentityVec();
2566 filter->chrV= sws_getIdentityVec();
2567 }
2568
2569 if (chromaSharpen!=0.0){
2570 SwsVector *id= sws_getIdentityVec();
2571 sws_scaleVec(filter->chrH, -chromaSharpen);
2572 sws_scaleVec(filter->chrV, -chromaSharpen);
2573 sws_addVec(filter->chrH, id);
2574 sws_addVec(filter->chrV, id);
2575 sws_freeVec(id);
2576 }
2577
2578 if (lumaSharpen!=0.0){
2579 SwsVector *id= sws_getIdentityVec();
2580 sws_scaleVec(filter->lumH, -lumaSharpen);
2581 sws_scaleVec(filter->lumV, -lumaSharpen);
2582 sws_addVec(filter->lumH, id);
2583 sws_addVec(filter->lumV, id);
2584 sws_freeVec(id);
2585 }
2586
2587 if (chromaHShift != 0.0)
2588 sws_shiftVec(filter->chrH, (int)(chromaHShift+0.5));
2589
2590 if (chromaVShift != 0.0)
2591 sws_shiftVec(filter->chrV, (int)(chromaVShift+0.5));
2592
2593 sws_normalizeVec(filter->chrH, 1.0);
2594 sws_normalizeVec(filter->chrV, 1.0);
2595 sws_normalizeVec(filter->lumH, 1.0);
2596 sws_normalizeVec(filter->lumV, 1.0);
2597
2598 if (verbose) sws_printVec(filter->chrH);
2599 if (verbose) sws_printVec(filter->lumH);
2600
2601 return filter;
e21206a8
MN
2602}
2603
d4e24275 2604/**
28bf81c9
MN
2605 * returns a normalized gaussian curve used to filter stuff
2606 * quality=3 is high quality, lowwer is lowwer quality
2607 */
d4e24275 2608SwsVector *sws_getGaussianVec(double variance, double quality){
221b804f
DB
2609 const int length= (int)(variance*quality + 0.5) | 1;
2610 int i;
2611 double *coeff= av_malloc(length*sizeof(double));
2612 double middle= (length-1)*0.5;
2613 SwsVector *vec= av_malloc(sizeof(SwsVector));
c7f822d9 2614
221b804f
DB
2615 vec->coeff= coeff;
2616 vec->length= length;
28bf81c9 2617
221b804f
DB
2618 for (i=0; i<length; i++)
2619 {
2620 double dist= i-middle;
e5091488 2621 coeff[i]= exp(-dist*dist/(2*variance*variance)) / sqrt(2*variance*PI);
221b804f 2622 }
28bf81c9 2623
221b804f 2624 sws_normalizeVec(vec, 1.0);
c7f822d9 2625
221b804f 2626 return vec;
28bf81c9
MN
2627}
2628
d4e24275 2629SwsVector *sws_getConstVec(double c, int length){
221b804f
DB
2630 int i;
2631 double *coeff= av_malloc(length*sizeof(double));
2632 SwsVector *vec= av_malloc(sizeof(SwsVector));
5521b193 2633
221b804f
DB
2634 vec->coeff= coeff;
2635 vec->length= length;
5521b193 2636
221b804f
DB
2637 for (i=0; i<length; i++)
2638 coeff[i]= c;
5521b193 2639
221b804f 2640 return vec;
5521b193
MN
2641}
2642
2643
d4e24275 2644SwsVector *sws_getIdentityVec(void){
221b804f 2645 return sws_getConstVec(1.0, 1);
c7f822d9
MN
2646}
2647
2e728364 2648double sws_dcVec(SwsVector *a){
221b804f
DB
2649 int i;
2650 double sum=0;
28bf81c9 2651
221b804f
DB
2652 for (i=0; i<a->length; i++)
2653 sum+= a->coeff[i];
28bf81c9 2654
221b804f 2655 return sum;
28bf81c9
MN
2656}
2657
d4e24275 2658void sws_scaleVec(SwsVector *a, double scalar){
221b804f 2659 int i;
c7f822d9 2660
221b804f
DB
2661 for (i=0; i<a->length; i++)
2662 a->coeff[i]*= scalar;
c7f822d9
MN
2663}
2664
2e728364 2665void sws_normalizeVec(SwsVector *a, double height){
221b804f 2666 sws_scaleVec(a, height/sws_dcVec(a));
2e728364
MN
2667}
2668
d4e24275 2669static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b){
221b804f
DB
2670 int length= a->length + b->length - 1;
2671 double *coeff= av_malloc(length*sizeof(double));
2672 int i, j;
2673 SwsVector *vec= av_malloc(sizeof(SwsVector));
c7f822d9 2674
221b804f
DB
2675 vec->coeff= coeff;
2676 vec->length= length;
28bf81c9 2677
221b804f 2678 for (i=0; i<length; i++) coeff[i]= 0.0;
28bf81c9 2679
221b804f
DB
2680 for (i=0; i<a->length; i++)
2681 {
2682 for (j=0; j<b->length; j++)
2683 {
2684 coeff[i+j]+= a->coeff[i]*b->coeff[j];
2685 }
2686 }
28bf81c9 2687
221b804f 2688 return vec;
28bf81c9
MN
2689}
2690
d4e24275 2691static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b){
221b804f
DB
2692 int length= FFMAX(a->length, b->length);
2693 double *coeff= av_malloc(length*sizeof(double));
2694 int i;
2695 SwsVector *vec= av_malloc(sizeof(SwsVector));
c7f822d9 2696
221b804f
DB
2697 vec->coeff= coeff;
2698 vec->length= length;
28bf81c9 2699
221b804f 2700 for (i=0; i<length; i++) coeff[i]= 0.0;
28bf81c9 2701
221b804f
DB
2702 for (i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
2703 for (i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]+= b->coeff[i];
c7f822d9 2704
221b804f 2705 return vec;
28bf81c9 2706}
c7f822d9 2707
d4e24275 2708static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b){
221b804f
DB
2709 int length= FFMAX(a->length, b->length);
2710 double *coeff= av_malloc(length*sizeof(double));
2711 int i;
2712 SwsVector *vec= av_malloc(sizeof(SwsVector));
c7f822d9 2713
221b804f
DB
2714 vec->coeff= coeff;
2715 vec->length= length;
c7f822d9 2716
221b804f 2717 for (i=0; i<length; i++) coeff[i]= 0.0;
c7f822d9 2718
221b804f
DB
2719 for (i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
2720 for (i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]-= b->coeff[i];
c7f822d9 2721
221b804f 2722 return vec;
c7f822d9
MN
2723}
2724
2725/* shift left / or right if "shift" is negative */
d4e24275 2726static SwsVector *sws_getShiftedVec(SwsVector *a, int shift){
221b804f
DB
2727 int length= a->length + FFABS(shift)*2;
2728 double *coeff= av_malloc(length*sizeof(double));
2729 int i;
2730 SwsVector *vec= av_malloc(sizeof(SwsVector));
c7f822d9 2731
221b804f
DB
2732 vec->coeff= coeff;
2733 vec->length= length;
c7f822d9 2734
221b804f 2735 for (i=0; i<length; i++) coeff[i]= 0.0;
c7f822d9 2736
221b804f
DB
2737 for (i=0; i<a->length; i++)
2738 {
2739 coeff[i + (length-1)/2 - (a->length-1)/2 - shift]= a->coeff[i];
2740 }
c7f822d9 2741
221b804f 2742 return vec;
c7f822d9
MN
2743}
2744
d4e24275 2745void sws_shiftVec(SwsVector *a, int shift){
221b804f
DB
2746 SwsVector *shifted= sws_getShiftedVec(a, shift);
2747 av_free(a->coeff);
2748 a->coeff= shifted->coeff;
2749 a->length= shifted->length;
2750 av_free(shifted);
5cebb24b
MN
2751}
2752
d4e24275 2753void sws_addVec(SwsVector *a, SwsVector *b){
221b804f
DB
2754 SwsVector *sum= sws_sumVec(a, b);
2755 av_free(a->coeff);
2756 a->coeff= sum->coeff;
2757 a->length= sum->length;
2758 av_free(sum);
5cebb24b
MN
2759}
2760
d4e24275 2761void sws_subVec(SwsVector *a, SwsVector *b){
221b804f
DB
2762 SwsVector *diff= sws_diffVec(a, b);
2763 av_free(a->coeff);
2764 a->coeff= diff->coeff;
2765 a->length= diff->length;
2766 av_free(diff);
5cebb24b
MN
2767}
2768
d4e24275 2769void sws_convVec(SwsVector *a, SwsVector *b){
221b804f
DB
2770 SwsVector *conv= sws_getConvVec(a, b);
2771 av_free(a->coeff);
2772 a->coeff= conv->coeff;
2773 a->length= conv->length;
2774 av_free(conv);
5cebb24b
MN
2775}
2776
d4e24275 2777SwsVector *sws_cloneVec(SwsVector *a){
221b804f
DB
2778 double *coeff= av_malloc(a->length*sizeof(double));
2779 int i;
2780 SwsVector *vec= av_malloc(sizeof(SwsVector));
5cebb24b 2781
221b804f
DB
2782 vec->coeff= coeff;
2783 vec->length= a->length;
5cebb24b 2784
221b804f 2785 for (i=0; i<a->length; i++) coeff[i]= a->coeff[i];
5cebb24b 2786
221b804f 2787 return vec;
5cebb24b
MN
2788}
2789
d4e24275 2790void sws_printVec(SwsVector *a){
221b804f
DB
2791 int i;
2792 double max=0;
2793 double min=0;
2794 double range;
2795
2796 for (i=0; i<a->length; i++)
2797 if (a->coeff[i]>max) max= a->coeff[i];
2798
2799 for (i=0; i<a->length; i++)
2800 if (a->coeff[i]<min) min= a->coeff[i];
2801
2802 range= max - min;
2803
2804 for (i=0; i<a->length; i++)
2805 {
2806 int x= (int)((a->coeff[i]-min)*60.0/range +0.5);
2807 av_log(NULL, AV_LOG_DEBUG, "%1.3f ", a->coeff[i]);
2808 for (;x>0; x--) av_log(NULL, AV_LOG_DEBUG, " ");
2809 av_log(NULL, AV_LOG_DEBUG, "|\n");
2810 }
c7f822d9
MN
2811}
2812
d4e24275 2813void sws_freeVec(SwsVector *a){
221b804f 2814 if (!a) return;
47b7382d 2815 av_freep(&a->coeff);
221b804f
DB
2816 a->length=0;
2817 av_free(a);
c7f822d9
MN
2818}
2819
e21206a8 2820void sws_freeFilter(SwsFilter *filter){
221b804f 2821 if (!filter) return;
e21206a8 2822
221b804f
DB
2823 if (filter->lumH) sws_freeVec(filter->lumH);
2824 if (filter->lumV) sws_freeVec(filter->lumV);
2825 if (filter->chrH) sws_freeVec(filter->chrH);
2826 if (filter->chrV) sws_freeVec(filter->chrV);
2827 av_free(filter);
e21206a8
MN
2828}
2829
2830
d4e24275 2831void sws_freeContext(SwsContext *c){
221b804f
DB
2832 int i;
2833 if (!c) return;
2834
2835 if (c->lumPixBuf)
2836 {
2837 for (i=0; i<c->vLumBufSize; i++)
47b7382d
MN
2838 av_freep(&c->lumPixBuf[i]);
2839 av_freep(&c->lumPixBuf);
221b804f
DB
2840 }
2841
2842 if (c->chrPixBuf)
2843 {
2844 for (i=0; i<c->vChrBufSize; i++)
47b7382d
MN
2845 av_freep(&c->chrPixBuf[i]);
2846 av_freep(&c->chrPixBuf);
221b804f
DB
2847 }
2848
47b7382d
MN
2849 av_freep(&c->vLumFilter);
2850 av_freep(&c->vChrFilter);
2851 av_freep(&c->hLumFilter);
2852 av_freep(&c->hChrFilter);
d33d485e 2853#ifdef HAVE_ALTIVEC
47b7382d
MN
2854 av_freep(&c->vYCoeffsBank);
2855 av_freep(&c->vCCoeffsBank);
d33d485e 2856#endif
c7f822d9 2857
47b7382d
MN
2858 av_freep(&c->vLumFilterPos);
2859 av_freep(&c->vChrFilterPos);
2860 av_freep(&c->hLumFilterPos);
2861 av_freep(&c->hChrFilterPos);
c7f822d9 2862
9bde778e 2863#if defined(ARCH_X86) && defined(CONFIG_GPL)
0ce5275d 2864#ifdef MAP_ANONYMOUS
221b804f
DB
2865 if (c->funnyYCode) munmap(c->funnyYCode, MAX_FUNNY_CODE_SIZE);
2866 if (c->funnyUVCode) munmap(c->funnyUVCode, MAX_FUNNY_CODE_SIZE);
38d5c282 2867#else
221b804f
DB
2868 av_free(c->funnyYCode);
2869 av_free(c->funnyUVCode);
38d5c282 2870#endif
221b804f
DB
2871 c->funnyYCode=NULL;
2872 c->funnyUVCode=NULL;
3d6a30d9 2873#endif /* defined(ARCH_X86) */
38d5c282 2874
47b7382d
MN
2875 av_freep(&c->lumMmx2Filter);
2876 av_freep(&c->chrMmx2Filter);
2877 av_freep(&c->lumMmx2FilterPos);
2878 av_freep(&c->chrMmx2FilterPos);
2879 av_freep(&c->yuvTable);
221b804f
DB
2880
2881 av_free(c);
c7f822d9
MN
2882}
2883
22e46959
VP
2884/**
2885 * Checks if context is valid or reallocs a new one instead.
2886 * If context is NULL, just calls sws_getContext() to get a new one.
2887 * Otherwise, checks if the parameters are the same already saved in context.
2888 * If that is the case, returns the current context.
2889 * Otherwise, frees context and gets a new one.
2890 *
2891 * Be warned that srcFilter, dstFilter are not checked, they are
2892 * asumed to remain valid.
2893 */
2894struct SwsContext *sws_getCachedContext(struct SwsContext *context,
221b804f
DB
2895 int srcW, int srcH, int srcFormat,
2896 int dstW, int dstH, int dstFormat, int flags,
2897 SwsFilter *srcFilter, SwsFilter *dstFilter, double *param)
22e46959 2898{
337a2a45
KMH
2899 static const double default_param[2] = {SWS_PARAM_DEFAULT, SWS_PARAM_DEFAULT};
2900
2901 if (!param)
2902 param = default_param;
2903
1b0a4572 2904 if (context) {
b8db4e22
BF
2905 if (context->srcW != srcW || context->srcH != srcH ||
2906 context->srcFormat != srcFormat ||
2907 context->dstW != dstW || context->dstH != dstH ||
2908 context->dstFormat != dstFormat || context->flags != flags ||
2909 context->param[0] != param[0] || context->param[1] != param[1])
22e46959
VP
2910 {
2911 sws_freeContext(context);
2912 context = NULL;
2913 }
2914 }
1b0a4572 2915 if (!context) {
22e46959 2916 return sws_getContext(srcW, srcH, srcFormat,
221b804f
DB
2917 dstW, dstH, dstFormat, flags,
2918 srcFilter, dstFilter, param);
22e46959
VP
2919 }
2920 return context;
2921}
2922