cleanup
[libav.git] / postproc / swscale.c
CommitLineData
fe8054c0
MN
1/*
2 Copyright (C) 2001-2002 Michael Niedermayer <michaelni@gmx.at>
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
31190492 8
fe8054c0
MN
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
31190492 13
fe8054c0
MN
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17*/
783e9cc9 18
28bf81c9 19/*
b72034dd 20 supported Input formats: YV12, I420, IYUV, YUY2, BGR32, BGR24, BGR16, BGR15, RGB32, RGB24, Y8, Y800
44c1035c 21 supported output formats: YV12, I420, IYUV, BGR15, BGR16, BGR24, BGR32, Y8, Y800
6c7506de 22 BGR15/16 support dithering
a861d4d7
MN
23
24 unscaled special converters
25 YV12/I420/IYUV -> BGR15/BGR16/BGR24/BGR32
26 YV12/I420/IYUV -> YV12/I420/IYUV
27 YUY2/BGR15/BGR16/BGR24/BGR32/RGB24/RGB32 -> same format
b935781b
MN
28 BGR24 -> BGR32 & RGB24 -> RGB32
29 BGR32 -> BGR24 & RGB32 -> RGB24
4bb3fa5e 30 BGR15 -> BGR16
b935781b
MN
31*/
32
33/*
34tested special converters
35 YV12/I420 -> BGR16
36 YV12 -> YV12
4bb3fa5e 37 BGR15 -> BGR16
1e1c4fe9 38 BGR16 -> BGR16
b935781b
MN
39
40untested special converters
1e1c4fe9
MN
41 YV12/I420 -> BGR15/BGR24/BGR32 (its the yuv2rgb stuff, so it should be ok)
42 YV12/I420 -> YV12/I420
43 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
b935781b
MN
44 BGR24 -> BGR32 & RGB24 -> RGB32
45 BGR32 -> BGR24 & RGB32 -> RGB24
ec22603f 46 BGR24 -> YV12
28bf81c9
MN
47*/
48
d3f41512 49#include <inttypes.h>
dda87e9f 50#include <string.h>
077ea8a7 51#include <math.h>
c1b0bfb4 52#include <stdio.h>
d3f41512 53#include "../config.h"
9b464428 54#include "../mangle.h"
81b7c056 55#include <assert.h>
c1b0bfb4
MN
56#ifdef HAVE_MALLOC_H
57#include <malloc.h>
b6acbc3c
BS
58#else
59#include <stdlib.h>
c1b0bfb4 60#endif
d604bab9 61#include "swscale.h"
7630f2e0 62#include "../cpudetect.h"
a861d4d7 63#include "../bswap.h"
28bf81c9 64#include "../libvo/img_format.h"
37079906 65#include "rgb2rgb.h"
b0db4198 66#include "../libvo/fastmemcpy.h"
4a53a912 67#include "../mp_msg.h"
0d9f3d85
A
68
69#define MSG_WARN(args...) mp_msg(MSGT_SWS,MSGL_WARN, ##args )
70#define MSG_FATAL(args...) mp_msg(MSGT_SWS,MSGL_FATAL, ##args )
71#define MSG_ERR(args...) mp_msg(MSGT_SWS,MSGL_ERR, ##args )
72#define MSG_V(args...) mp_msg(MSGT_SWS,MSGL_V, ##args )
73#define MSG_DBG2(args...) mp_msg(MSGT_SWS,MSGL_DBG2, ##args )
74#define MSG_INFO(args...) mp_msg(MSGT_SWS,MSGL_INFO, ##args )
75
541c4eb9 76#undef MOVNTQ
7d7f78b5 77#undef PAVGB
d3f41512 78
783e9cc9 79//#undef HAVE_MMX2
7f56a527 80//#define HAVE_3DNOW
d3f41512 81//#undef HAVE_MMX
783e9cc9 82//#undef ARCH_X86
2ba1bff0 83//#define WORDS_BIGENDIAN
d604bab9 84#define DITHER1XBPP
d3f41512 85
ac6a2e45
MN
86#define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
87
1e621b18 88#define RET 0xC3 //near return opcode for X86
c1b0bfb4 89
28bf81c9 90#ifdef MP_DEBUG
81b7c056 91#define ASSERT(x) assert(x);
28bf81c9 92#else
c1b0bfb4 93#define ASSERT(x) ;
28bf81c9
MN
94#endif
95
96#ifdef M_PI
97#define PI M_PI
98#else
99#define PI 3.14159265358979323846
100#endif
c1b0bfb4 101
6c7506de 102//FIXME replace this with something faster
44c1035c
MN
103#define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_YVU9)
104#define isYUV(x) ((x)==IMGFMT_YUY2 || isPlanarYUV(x))
44c1035c 105#define isGray(x) ((x)==IMGFMT_Y800)
6ff0ad6b 106#define isSupportedIn(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_YUY2 \
b72034dd 107 || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15\
a861d4d7 108 || (x)==IMGFMT_RGB32|| (x)==IMGFMT_RGB24\
6ff0ad6b
MN
109 || (x)==IMGFMT_Y800)
110#define isSupportedOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 \
0d9f3d85 111 || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15\
0d9f3d85 112 || (x)==IMGFMT_Y800)
44c1035c
MN
113#define isRGB(x) (((x)&IMGFMT_RGB_MASK)==IMGFMT_RGB)
114#define isBGR(x) (((x)&IMGFMT_BGR_MASK)==IMGFMT_BGR)
115#define isPacked(x) ((x)==IMGFMT_YUY2 || isRGB(x) || isBGR(x))
6ff0ad6b
MN
116
117#define RGB2YUV_SHIFT 16
1e621b18
MN
118#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
119#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
120#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
121#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
122#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
123#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
124#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
125#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
126#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
6c7506de 127
e3d2500f 128extern int verbose; // defined in mplayer.c
783e9cc9
MN
129/*
130NOTES
d604bab9 131Special versions: fast Y 1:1 scaling (no interpolation in y direction)
31190492 132
783e9cc9 133TODO
d604bab9 134more intelligent missalignment avoidance for the horizontal scaler
c1b0bfb4
MN
135write special vertical cubic upscale version
136Optimize C code (yv12 / minmax)
ff7ba856 137add support for packed pixel yuv input & output
6ff0ad6b
MN
138add support for Y8 output
139optimize bgr24 & bgr32
ff7ba856 140add BGR4 output support
1e621b18 141write special BGR->BGR scaler
37079906 142deglobalize yuv2rgb*.c
783e9cc9 143*/
31190492 144
d604bab9 145#define ABS(a) ((a) > 0 ? (a) : (-(a)))
2ff198c1
MN
146#define MIN(a,b) ((a) > (b) ? (b) : (a))
147#define MAX(a,b) ((a) < (b) ? (b) : (a))
d604bab9 148
7630f2e0
MN
149#ifdef ARCH_X86
150#define CAN_COMPILE_X86_ASM
d604bab9
MN
151#endif
152
7630f2e0 153#ifdef CAN_COMPILE_X86_ASM
d604bab9 154static uint64_t __attribute__((aligned(8))) yCoeff= 0x2568256825682568LL;
390b20a6
MN
155static uint64_t __attribute__((aligned(8))) vrCoeff= 0x3343334333433343LL;
156static uint64_t __attribute__((aligned(8))) ubCoeff= 0x40cf40cf40cf40cfLL;
157static uint64_t __attribute__((aligned(8))) vgCoeff= 0xE5E2E5E2E5E2E5E2LL;
158static uint64_t __attribute__((aligned(8))) ugCoeff= 0xF36EF36EF36EF36ELL;
f62255fb
MN
159static uint64_t __attribute__((aligned(8))) bF8= 0xF8F8F8F8F8F8F8F8LL;
160static uint64_t __attribute__((aligned(8))) bFC= 0xFCFCFCFCFCFCFCFCLL;
d604bab9
MN
161static uint64_t __attribute__((aligned(8))) w400= 0x0400040004000400LL;
162static uint64_t __attribute__((aligned(8))) w80= 0x0080008000800080LL;
163static uint64_t __attribute__((aligned(8))) w10= 0x0010001000100010LL;
077ea8a7 164static uint64_t __attribute__((aligned(8))) w02= 0x0002000200020002LL;
d604bab9
MN
165static uint64_t __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL;
166static uint64_t __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL;
167static uint64_t __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL;
6ff0ad6b 168static uint64_t __attribute__((aligned(8))) bm01010101=0x00FF00FF00FF00FFLL;
d604bab9 169
3fe8e8f0
MN
170static volatile uint64_t __attribute__((aligned(8))) b5Dither;
171static volatile uint64_t __attribute__((aligned(8))) g5Dither;
172static volatile uint64_t __attribute__((aligned(8))) g6Dither;
173static volatile uint64_t __attribute__((aligned(8))) r5Dither;
d8fa3c54
MN
174
175static uint64_t __attribute__((aligned(8))) dither4[2]={
176 0x0103010301030103LL,
177 0x0200020002000200LL,};
178
179static uint64_t __attribute__((aligned(8))) dither8[2]={
180 0x0602060206020602LL,
181 0x0004000400040004LL,};
d604bab9
MN
182
183static uint64_t __attribute__((aligned(8))) b16Mask= 0x001F001F001F001FLL;
184static uint64_t __attribute__((aligned(8))) g16Mask= 0x07E007E007E007E0LL;
185static uint64_t __attribute__((aligned(8))) r16Mask= 0xF800F800F800F800LL;
186static uint64_t __attribute__((aligned(8))) b15Mask= 0x001F001F001F001FLL;
187static uint64_t __attribute__((aligned(8))) g15Mask= 0x03E003E003E003E0LL;
188static uint64_t __attribute__((aligned(8))) r15Mask= 0x7C007C007C007C00LL;
189
99d2cb72
MN
190static uint64_t __attribute__((aligned(8))) M24A= 0x00FF0000FF0000FFLL;
191static uint64_t __attribute__((aligned(8))) M24B= 0xFF0000FF0000FF00LL;
192static uint64_t __attribute__((aligned(8))) M24C= 0x0000FF0000FF0000LL;
193
ac6a2e45
MN
194#ifdef FAST_BGR2YV12
195static const uint64_t bgr2YCoeff __attribute__((aligned(8))) = 0x000000210041000DULL;
4342fc14
MN
196static const uint64_t bgr2UCoeff __attribute__((aligned(8))) = 0x0000FFEEFFDC0038ULL;
197static const uint64_t bgr2VCoeff __attribute__((aligned(8))) = 0x00000038FFD2FFF8ULL;
ac6a2e45
MN
198#else
199static const uint64_t bgr2YCoeff __attribute__((aligned(8))) = 0x000020E540830C8BULL;
4342fc14
MN
200static const uint64_t bgr2UCoeff __attribute__((aligned(8))) = 0x0000ED0FDAC23831ULL;
201static const uint64_t bgr2VCoeff __attribute__((aligned(8))) = 0x00003831D0E6F6EAULL;
ac6a2e45
MN
202#endif
203static const uint64_t bgr2YOffset __attribute__((aligned(8))) = 0x1010101010101010ULL;
4342fc14 204static const uint64_t bgr2UVOffset __attribute__((aligned(8)))= 0x8080808080808080ULL;
ac6a2e45
MN
205static const uint64_t w1111 __attribute__((aligned(8))) = 0x0001000100010001ULL;
206
28bf81c9 207// FIXME remove
d604bab9
MN
208static uint64_t __attribute__((aligned(8))) asm_yalpha1;
209static uint64_t __attribute__((aligned(8))) asm_uvalpha1;
d604bab9 210#endif
783e9cc9
MN
211
212// clipping helper table for C implementations:
213static unsigned char clip_table[768];
214
b18ea156
MN
215static unsigned short clip_table16b[768];
216static unsigned short clip_table16g[768];
217static unsigned short clip_table16r[768];
218static unsigned short clip_table15b[768];
219static unsigned short clip_table15g[768];
220static unsigned short clip_table15r[768];
221
783e9cc9
MN
222// yuv->rgb conversion tables:
223static int yuvtab_2568[256];
224static int yuvtab_3343[256];
225static int yuvtab_0c92[256];
226static int yuvtab_1a1e[256];
227static int yuvtab_40cf[256];
c1b0bfb4
MN
228// Needed for cubic scaler to catch overflows
229static int clip_yuvtab_2568[768];
230static int clip_yuvtab_3343[768];
231static int clip_yuvtab_0c92[768];
232static int clip_yuvtab_1a1e[768];
233static int clip_yuvtab_40cf[768];
234
28bf81c9 235//global sws_flags from the command line
1f347f22 236int sws_flags=2;
077ea8a7 237
5cebb24b
MN
238//global srcFilter
239SwsFilter src_filter= {NULL, NULL, NULL, NULL};
240
241float sws_lum_gblur= 0.0;
242float sws_chr_gblur= 0.0;
243int sws_chr_vshift= 0;
244int sws_chr_hshift= 0;
5521b193
MN
245float sws_chr_sharpen= 0.0;
246float sws_lum_sharpen= 0.0;
5cebb24b 247
28bf81c9
MN
248/* cpuCaps combined from cpudetect and whats actually compiled in
249 (if there is no support for something compiled in it wont appear here) */
250static CpuCaps cpuCaps;
d3f41512 251
28bf81c9
MN
252void (*swScale)(SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY,
253 int srcSliceH, uint8_t* dst[], int dstStride[])=NULL;
2ff198c1 254
5cebb24b
MN
255static SwsVector *getConvVec(SwsVector *a, SwsVector *b);
256
7630f2e0 257#ifdef CAN_COMPILE_X86_ASM
96034638
MN
258void in_asm_used_var_warning_killer()
259{
077ea8a7 260 volatile int i= yCoeff+vrCoeff+ubCoeff+vgCoeff+ugCoeff+bF8+bFC+w400+w80+w10+
28bf81c9 261 bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+asm_yalpha1+ asm_uvalpha1+
6ff0ad6b 262 M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]+bm01010101;
96034638
MN
263 if(i) i=0;
264}
265#endif
d604bab9 266
c7a810cc
MN
267static int testFormat[]={
268IMGFMT_YV12,
269//IMGFMT_IYUV,
270IMGFMT_I420,
271IMGFMT_BGR15,
272IMGFMT_BGR16,
273IMGFMT_BGR24,
274IMGFMT_BGR32,
275//IMGFMT_Y8,
276IMGFMT_Y800,
277//IMGFMT_YUY2,
2780
279};
280
281static uint64_t getSSD(uint8_t *src1, uint8_t *src2, int stride1, int stride2, int w, int h){
282 int x,y;
283 uint64_t ssd=0;
284
285 for(y=0; y<h; y++){
286 for(x=0; x<w; x++){
287 int d= src1[x + y*stride1] - src2[x + y*stride2];
288 ssd+= d*d;
289 }
290 }
291 return ssd;
292}
293
294// test by ref -> src -> dst -> out & compare out against ref
295// ref & out are YV12
296static void doTest(uint8_t *ref[3], int refStride[3], int w, int h, int srcFormat, int dstFormat,
297 int srcW, int srcH, int dstW, int dstH, int flags){
298 uint8_t *src[3];
299 uint8_t *dst[3];
300 uint8_t *out[3];
301 int srcStride[3], dstStride[3];
302 int i;
303 uint64_t ssdY, ssdU, ssdV;
304 SwsContext *srcContext, *dstContext, *outContext;
305
306 for(i=0; i<3; i++){
307 srcStride[i]= srcW*4;
308 dstStride[i]= dstW*4;
309 src[i]= malloc(srcStride[i]*srcH);
310 dst[i]= malloc(dstStride[i]*dstH);
311 out[i]= malloc(refStride[i]*h);
312 }
313
314 srcContext= getSwsContext(w, h, IMGFMT_YV12, srcW, srcH, srcFormat, flags, NULL, NULL);
315 dstContext= getSwsContext(srcW, srcH, srcFormat, dstW, dstH, dstFormat, flags, NULL, NULL);
316 outContext= getSwsContext(dstW, dstH, dstFormat, w, h, IMGFMT_YV12, flags, NULL, NULL);
317 if(srcContext==NULL ||dstContext==NULL ||outContext==NULL){
318 printf("Failed allocating swsContext\n");
319 goto end;
320 }
321// printf("test %X %X %X -> %X %X %X\n", (int)ref[0], (int)ref[1], (int)ref[2],
322// (int)src[0], (int)src[1], (int)src[2]);
323
324 srcContext->swScale(srcContext, ref, refStride, 0, h , src, srcStride);
325 dstContext->swScale(dstContext, src, srcStride, 0, srcH, dst, dstStride);
326 outContext->swScale(outContext, dst, dstStride, 0, dstH, out, refStride);
327
328 ssdY= getSSD(ref[0], out[0], refStride[0], refStride[0], w, h);
329 ssdU= getSSD(ref[1], out[1], refStride[1], refStride[1], (w+1)>>1, (h+1)>>1);
330 ssdV= getSSD(ref[2], out[2], refStride[2], refStride[2], (w+1)>>1, (h+1)>>1);
331
332 if(isGray(srcFormat) || isGray(dstFormat)) ssdU=ssdV=0; //FIXME check that output is really gray
333
334 ssdY/= w*h;
335 ssdU/= w*h/4;
336 ssdV/= w*h/4;
337
338 if(ssdY>10 || ssdU>10 || ssdV>10){
339 printf(" %s %dx%d -> %s %4dx%4d flags=%2d SSD=%5lld,%5lld,%5lld\n",
340 vo_format_name(srcFormat), srcW, srcH,
341 vo_format_name(dstFormat), dstW, dstH,
342 flags,
343 ssdY, ssdU, ssdV);
344 }
345
346 end:
347
348 freeSwsContext(srcContext);
349 freeSwsContext(dstContext);
350 freeSwsContext(outContext);
351
352 for(i=0; i<3; i++){
353 free(src[i]);
354 free(dst[i]);
355 free(out[i]);
356 }
357}
358
359static void selfTest(uint8_t *src[3], int stride[3], int w, int h){
360 int srcFormat, dstFormat, srcFormatIndex, dstFormatIndex;
361 int srcW, srcH, dstW, dstH;
362 int flags;
363
364 for(srcFormatIndex=0; ;srcFormatIndex++){
365 srcFormat= testFormat[srcFormatIndex];
366 if(!srcFormat) break;
367 for(dstFormatIndex=0; ;dstFormatIndex++){
368 dstFormat= testFormat[dstFormatIndex];
369 if(!dstFormat) break;
370 if(!isSupportedOut(dstFormat)) continue;
371
372 srcW= w+w/3;
373 srcH= h+h/3;
374 for(dstW=w; dstW<w*2; dstW+= dstW/3){
375 for(dstH=h; dstH<h*2; dstH+= dstH/3){
376 for(flags=1; flags<33; flags*=2)
377 doTest(src, stride, w, h, srcFormat, dstFormat,
378 srcW, srcH, dstW, dstH, flags);
379 }
380 }
381 }
382 }
383}
384
e3d2500f
MN
385static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
386 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
387 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW)
388{
389 //FIXME Optimize (just quickly writen not opti..)
390 int i;
391 for(i=0; i<dstW; i++)
392 {
393 int val=0;
394 int j;
395 for(j=0; j<lumFilterSize; j++)
396 val += lumSrc[j][i] * lumFilter[j];
397
398 dest[i]= MIN(MAX(val>>19, 0), 255);
399 }
400
401 if(uDest != NULL)
402 for(i=0; i<(dstW>>1); i++)
403 {
404 int u=0;
405 int v=0;
406 int j;
627690b5 407 for(j=0; j<chrFilterSize; j++)
e3d2500f
MN
408 {
409 u += chrSrc[j][i] * chrFilter[j];
410 v += chrSrc[j][i + 2048] * chrFilter[j];
411 }
412
413 uDest[i]= MIN(MAX(u>>19, 0), 255);
414 vDest[i]= MIN(MAX(v>>19, 0), 255);
415 }
416}
417
418static inline void yuv2rgbXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
419 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
28bf81c9 420 uint8_t *dest, int dstW, int dstFormat)
e3d2500f 421{
28bf81c9 422 if(dstFormat==IMGFMT_BGR32)
e3d2500f 423 {
2ba1bff0 424 int i;
df3c183a
MN
425#ifdef WORDS_BIGENDIAN
426 dest++;
427#endif
e3d2500f
MN
428 for(i=0; i<(dstW>>1); i++){
429 int j;
430 int Y1=0;
431 int Y2=0;
432 int U=0;
433 int V=0;
434 int Cb, Cr, Cg;
435 for(j=0; j<lumFilterSize; j++)
436 {
437 Y1 += lumSrc[j][2*i] * lumFilter[j];
438 Y2 += lumSrc[j][2*i+1] * lumFilter[j];
439 }
440 for(j=0; j<chrFilterSize; j++)
441 {
442 U += chrSrc[j][i] * chrFilter[j];
443 V += chrSrc[j][i+2048] * chrFilter[j];
444 }
445 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ];
446 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ];
447 U >>= 19;
448 V >>= 19;
449
450 Cb= clip_yuvtab_40cf[U+ 256];
451 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256];
452 Cr= clip_yuvtab_3343[V+ 256];
453
454 dest[8*i+0]=clip_table[((Y1 + Cb) >>13)];
455 dest[8*i+1]=clip_table[((Y1 + Cg) >>13)];
456 dest[8*i+2]=clip_table[((Y1 + Cr) >>13)];
457
458 dest[8*i+4]=clip_table[((Y2 + Cb) >>13)];
459 dest[8*i+5]=clip_table[((Y2 + Cg) >>13)];
460 dest[8*i+6]=clip_table[((Y2 + Cr) >>13)];
461 }
462 }
28bf81c9 463 else if(dstFormat==IMGFMT_BGR24)
e3d2500f
MN
464 {
465 int i;
466 for(i=0; i<(dstW>>1); i++){
467 int j;
468 int Y1=0;
469 int Y2=0;
470 int U=0;
471 int V=0;
472 int Cb, Cr, Cg;
473 for(j=0; j<lumFilterSize; j++)
474 {
475 Y1 += lumSrc[j][2*i] * lumFilter[j];
476 Y2 += lumSrc[j][2*i+1] * lumFilter[j];
477 }
478 for(j=0; j<chrFilterSize; j++)
479 {
480 U += chrSrc[j][i] * chrFilter[j];
481 V += chrSrc[j][i+2048] * chrFilter[j];
482 }
483 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ];
484 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ];
485 U >>= 19;
486 V >>= 19;
487
488 Cb= clip_yuvtab_40cf[U+ 256];
489 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256];
490 Cr= clip_yuvtab_3343[V+ 256];
491
492 dest[0]=clip_table[((Y1 + Cb) >>13)];
493 dest[1]=clip_table[((Y1 + Cg) >>13)];
494 dest[2]=clip_table[((Y1 + Cr) >>13)];
495
496 dest[3]=clip_table[((Y2 + Cb) >>13)];
497 dest[4]=clip_table[((Y2 + Cg) >>13)];
498 dest[5]=clip_table[((Y2 + Cr) >>13)];
499 dest+=6;
500 }
501 }
28bf81c9 502 else if(dstFormat==IMGFMT_BGR16)
e3d2500f
MN
503 {
504 int i;
5521b193
MN
505#ifdef DITHER1XBPP
506 static int ditherb1=1<<14;
507 static int ditherg1=1<<13;
508 static int ditherr1=2<<14;
509 static int ditherb2=3<<14;
510 static int ditherg2=3<<13;
511 static int ditherr2=0<<14;
512
513 ditherb1 ^= (1^2)<<14;
514 ditherg1 ^= (1^2)<<13;
515 ditherr1 ^= (1^2)<<14;
516 ditherb2 ^= (3^0)<<14;
517 ditherg2 ^= (3^0)<<13;
518 ditherr2 ^= (3^0)<<14;
519#else
520 const int ditherb1=0;
521 const int ditherg1=0;
522 const int ditherr1=0;
523 const int ditherb2=0;
524 const int ditherg2=0;
525 const int ditherr2=0;
526#endif
e3d2500f
MN
527 for(i=0; i<(dstW>>1); i++){
528 int j;
529 int Y1=0;
530 int Y2=0;
531 int U=0;
532 int V=0;
533 int Cb, Cr, Cg;
534 for(j=0; j<lumFilterSize; j++)
535 {
536 Y1 += lumSrc[j][2*i] * lumFilter[j];
537 Y2 += lumSrc[j][2*i+1] * lumFilter[j];
538 }
539 for(j=0; j<chrFilterSize; j++)
540 {
541 U += chrSrc[j][i] * chrFilter[j];
542 V += chrSrc[j][i+2048] * chrFilter[j];
543 }
544 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ];
545 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ];
546 U >>= 19;
547 V >>= 19;
548
549 Cb= clip_yuvtab_40cf[U+ 256];
550 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256];
551 Cr= clip_yuvtab_3343[V+ 256];
552
553 ((uint16_t*)dest)[2*i] =
5521b193
MN
554 clip_table16b[(Y1 + Cb + ditherb1) >>13] |
555 clip_table16g[(Y1 + Cg + ditherg1) >>13] |
556 clip_table16r[(Y1 + Cr + ditherr1) >>13];
e3d2500f
MN
557
558 ((uint16_t*)dest)[2*i+1] =
5521b193
MN
559 clip_table16b[(Y2 + Cb + ditherb2) >>13] |
560 clip_table16g[(Y2 + Cg + ditherg2) >>13] |
561 clip_table16r[(Y2 + Cr + ditherr2) >>13];
e3d2500f
MN
562 }
563 }
28bf81c9 564 else if(dstFormat==IMGFMT_BGR15)
e3d2500f
MN
565 {
566 int i;
5521b193
MN
567#ifdef DITHER1XBPP
568 static int ditherb1=1<<14;
569 static int ditherg1=1<<14;
570 static int ditherr1=2<<14;
571 static int ditherb2=3<<14;
572 static int ditherg2=3<<14;
573 static int ditherr2=0<<14;
574
575 ditherb1 ^= (1^2)<<14;
576 ditherg1 ^= (1^2)<<14;
577 ditherr1 ^= (1^2)<<14;
578 ditherb2 ^= (3^0)<<14;
579 ditherg2 ^= (3^0)<<14;
580 ditherr2 ^= (3^0)<<14;
581#else
582 const int ditherb1=0;
583 const int ditherg1=0;
584 const int ditherr1=0;
585 const int ditherb2=0;
586 const int ditherg2=0;
587 const int ditherr2=0;
588#endif
e3d2500f
MN
589 for(i=0; i<(dstW>>1); i++){
590 int j;
591 int Y1=0;
592 int Y2=0;
593 int U=0;
594 int V=0;
595 int Cb, Cr, Cg;
596 for(j=0; j<lumFilterSize; j++)
597 {
598 Y1 += lumSrc[j][2*i] * lumFilter[j];
599 Y2 += lumSrc[j][2*i+1] * lumFilter[j];
600 }
601 for(j=0; j<chrFilterSize; j++)
602 {
603 U += chrSrc[j][i] * chrFilter[j];
604 V += chrSrc[j][i+2048] * chrFilter[j];
605 }
606 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ];
607 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ];
608 U >>= 19;
609 V >>= 19;
610
611 Cb= clip_yuvtab_40cf[U+ 256];
612 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256];
613 Cr= clip_yuvtab_3343[V+ 256];
614
615 ((uint16_t*)dest)[2*i] =
5521b193
MN
616 clip_table15b[(Y1 + Cb + ditherb1) >>13] |
617 clip_table15g[(Y1 + Cg + ditherg1) >>13] |
618 clip_table15r[(Y1 + Cr + ditherr1) >>13];
e3d2500f
MN
619
620 ((uint16_t*)dest)[2*i+1] =
5521b193
MN
621 clip_table15b[(Y2 + Cb + ditherb2) >>13] |
622 clip_table15g[(Y2 + Cg + ditherg2) >>13] |
623 clip_table15r[(Y2 + Cr + ditherr2) >>13];
e3d2500f
MN
624 }
625 }
626}
627
628
7630f2e0
MN
629//Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
630//Plain C versions
726a959a
MN
631#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
632#define COMPILE_C
633#endif
634
635#ifdef CAN_COMPILE_X86_ASM
636
637#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
638#define COMPILE_MMX
639#endif
640
641#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
642#define COMPILE_MMX2
643#endif
644
645#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
646#define COMPILE_3DNOW
647#endif
648#endif //CAN_COMPILE_X86_ASM
649
650#undef HAVE_MMX
651#undef HAVE_MMX2
652#undef HAVE_3DNOW
726a959a
MN
653
654#ifdef COMPILE_C
7630f2e0
MN
655#undef HAVE_MMX
656#undef HAVE_MMX2
657#undef HAVE_3DNOW
7630f2e0
MN
658#define RENAME(a) a ## _C
659#include "swscale_template.c"
726a959a 660#endif
397c035e 661
7630f2e0 662#ifdef CAN_COMPILE_X86_ASM
397c035e 663
7630f2e0
MN
664//X86 versions
665/*
666#undef RENAME
667#undef HAVE_MMX
668#undef HAVE_MMX2
669#undef HAVE_3DNOW
670#define ARCH_X86
671#define RENAME(a) a ## _X86
672#include "swscale_template.c"
1faf0867 673*/
7630f2e0 674//MMX versions
726a959a 675#ifdef COMPILE_MMX
7630f2e0
MN
676#undef RENAME
677#define HAVE_MMX
678#undef HAVE_MMX2
679#undef HAVE_3DNOW
7630f2e0
MN
680#define RENAME(a) a ## _MMX
681#include "swscale_template.c"
726a959a 682#endif
7630f2e0
MN
683
684//MMX2 versions
726a959a 685#ifdef COMPILE_MMX2
7630f2e0
MN
686#undef RENAME
687#define HAVE_MMX
688#define HAVE_MMX2
689#undef HAVE_3DNOW
7630f2e0
MN
690#define RENAME(a) a ## _MMX2
691#include "swscale_template.c"
726a959a 692#endif
7630f2e0
MN
693
694//3DNOW versions
726a959a 695#ifdef COMPILE_3DNOW
7630f2e0
MN
696#undef RENAME
697#define HAVE_MMX
698#undef HAVE_MMX2
699#define HAVE_3DNOW
7630f2e0
MN
700#define RENAME(a) a ## _3DNow
701#include "swscale_template.c"
726a959a 702#endif
7630f2e0
MN
703
704#endif //CAN_COMPILE_X86_ASM
705
706// minor note: the HAVE_xyz is messed up after that line so dont use it
d604bab9 707
d3f41512 708
6c7506de 709// old global scaler, dont use for new code
28bf81c9
MN
710// will use sws_flags from the command line
711void SwScale_YV12slice(unsigned char* src[], int srcStride[], int srcSliceY ,
712 int srcSliceH, uint8_t* dst[], int dstStride, int dstbpp,
d1fac6cf 713 int srcW, int srcH, int dstW, int dstH){
31190492 714
28bf81c9
MN
715 static SwsContext *context=NULL;
716 int dstFormat;
28bf81c9
MN
717 int dstStride3[3]= {dstStride, dstStride>>1, dstStride>>1};
718
6c7506de 719 switch(dstbpp)
28bf81c9 720 {
6c7506de
MN
721 case 8 : dstFormat= IMGFMT_Y8; break;
722 case 12: dstFormat= IMGFMT_YV12; break;
723 case 15: dstFormat= IMGFMT_BGR15; break;
724 case 16: dstFormat= IMGFMT_BGR16; break;
725 case 24: dstFormat= IMGFMT_BGR24; break;
726 case 32: dstFormat= IMGFMT_BGR32; break;
727 default: return;
728 }
729
730 if(!context) context=getSwsContextFromCmdLine(srcW, srcH, IMGFMT_YV12, dstW, dstH, dstFormat);
731
b6654a54 732 context->swScale(context, src, srcStride, srcSliceY, srcSliceH, dst, dstStride3);
6c7506de
MN
733}
734
735// will use sws_flags & src_filter (from cmd line)
736SwsContext *getSwsContextFromCmdLine(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat)
737{
738 int flags=0;
739 static int firstTime=1;
740
5521b193 741#ifdef ARCH_X86
6c7506de
MN
742 if(gCpuCaps.hasMMX)
743 asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions)
5521b193 744#endif
6c7506de
MN
745 if(firstTime)
746 {
28bf81c9 747 firstTime=0;
6c7506de
MN
748 flags= SWS_PRINT_INFO;
749 }
750 else if(verbose>1) flags= SWS_PRINT_INFO;
751
752 if(src_filter.lumH) freeVec(src_filter.lumH);
753 if(src_filter.lumV) freeVec(src_filter.lumV);
754 if(src_filter.chrH) freeVec(src_filter.chrH);
755 if(src_filter.chrV) freeVec(src_filter.chrV);
756
757 if(sws_lum_gblur!=0.0){
758 src_filter.lumH= getGaussianVec(sws_lum_gblur, 3.0);
759 src_filter.lumV= getGaussianVec(sws_lum_gblur, 3.0);
760 }else{
761 src_filter.lumH= getIdentityVec();
762 src_filter.lumV= getIdentityVec();
763 }
c7f822d9 764
6c7506de
MN
765 if(sws_chr_gblur!=0.0){
766 src_filter.chrH= getGaussianVec(sws_chr_gblur, 3.0);
767 src_filter.chrV= getGaussianVec(sws_chr_gblur, 3.0);
768 }else{
769 src_filter.chrH= getIdentityVec();
770 src_filter.chrV= getIdentityVec();
771 }
5521b193 772
6c7506de
MN
773 if(sws_chr_sharpen!=0.0){
774 SwsVector *g= getConstVec(-1.0, 3);
775 SwsVector *id= getConstVec(10.0/sws_chr_sharpen, 1);
776 g->coeff[1]=2.0;
777 addVec(id, g);
778 convVec(src_filter.chrH, id);
779 convVec(src_filter.chrV, id);
780 freeVec(g);
781 freeVec(id);
782 }
5521b193 783
6c7506de
MN
784 if(sws_lum_sharpen!=0.0){
785 SwsVector *g= getConstVec(-1.0, 3);
786 SwsVector *id= getConstVec(10.0/sws_lum_sharpen, 1);
787 g->coeff[1]=2.0;
788 addVec(id, g);
789 convVec(src_filter.lumH, id);
790 convVec(src_filter.lumV, id);
791 freeVec(g);
792 freeVec(id);
793 }
c7f822d9 794
6c7506de
MN
795 if(sws_chr_hshift)
796 shiftVec(src_filter.chrH, sws_chr_hshift);
c7f822d9 797
6c7506de
MN
798 if(sws_chr_vshift)
799 shiftVec(src_filter.chrV, sws_chr_vshift);
5521b193 800
6c7506de
MN
801 normalizeVec(src_filter.chrH, 1.0);
802 normalizeVec(src_filter.chrV, 1.0);
803 normalizeVec(src_filter.lumH, 1.0);
804 normalizeVec(src_filter.lumV, 1.0);
28bf81c9 805
6c7506de
MN
806 if(verbose > 1) printVec(src_filter.chrH);
807 if(verbose > 1) printVec(src_filter.lumH);
28bf81c9
MN
808
809 switch(sws_flags)
810 {
811 case 0: flags|= SWS_FAST_BILINEAR; break;
812 case 1: flags|= SWS_BILINEAR; break;
813 case 2: flags|= SWS_BICUBIC; break;
814 case 3: flags|= SWS_X; break;
ff7ba856 815 case 4: flags|= SWS_POINT; break;
d8863d37 816 case 5: flags|= SWS_AREA; break;
28bf81c9
MN
817 default:flags|= SWS_BILINEAR; break;
818 }
819
6c7506de 820 return getSwsContext(srcW, srcH, srcFormat, dstW, dstH, dstFormat, flags, &src_filter, NULL);
28bf81c9
MN
821}
822
6c7506de 823
c7f822d9
MN
824static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
825 int srcW, int dstW, int filterAlign, int one, int flags,
826 SwsVector *srcFilter, SwsVector *dstFilter)
28bf81c9
MN
827{
828 int i;
c7f822d9
MN
829 int filterSize;
830 int filter2Size;
831 int minFilterSize;
832 double *filter=NULL;
833 double *filter2=NULL;
28bf81c9
MN
834#ifdef ARCH_X86
835 if(gCpuCaps.hasMMX)
836 asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions)
726a959a 837#endif
31190492 838
adeaecb9 839 // Note the +1 is for the MMXscaler which reads over the end
6c7506de 840 *filterPos = (int16_t*)memalign(8, (dstW+1)*sizeof(int16_t));
6c7506de 841
28bf81c9
MN
842 if(ABS(xInc - 0x10000) <10) // unscaled
843 {
844 int i;
c7f822d9
MN
845 filterSize= 1;
846 filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
847 for(i=0; i<dstW*filterSize; i++) filter[i]=0;
28bf81c9
MN
848
849 for(i=0; i<dstW; i++)
850 {
c7f822d9
MN
851 filter[i*filterSize]=1;
852 (*filterPos)[i]=i;
28bf81c9
MN
853 }
854
855 }
ff7ba856
MN
856 else if(flags&SWS_POINT) // lame looking point sampling mode
857 {
858 int i;
859 int xDstInSrc;
860 filterSize= 1;
861 filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
862
863 xDstInSrc= xInc/2 - 0x8000;
864 for(i=0; i<dstW; i++)
865 {
8a01d20c 866 int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
ff7ba856
MN
867
868 (*filterPos)[i]= xx;
869 filter[i]= 1.0;
870 xDstInSrc+= xInc;
871 }
872 }
28bf81c9
MN
873 else if(xInc <= (1<<16) || (flags&SWS_FAST_BILINEAR)) // upscale
874 {
875 int i;
876 int xDstInSrc;
c7f822d9
MN
877 if (flags&SWS_BICUBIC) filterSize= 4;
878 else if(flags&SWS_X ) filterSize= 4;
d8863d37 879 else filterSize= 2; // SWS_BILINEAR / SWS_AREA
c7f822d9 880 filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
28bf81c9
MN
881
882 xDstInSrc= xInc/2 - 0x8000;
883 for(i=0; i<dstW; i++)
884 {
8a01d20c 885 int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
28bf81c9
MN
886 int j;
887
c7f822d9 888 (*filterPos)[i]= xx;
28bf81c9
MN
889 if((flags & SWS_BICUBIC) || (flags & SWS_X))
890 {
891 double d= ABS(((xx+1)<<16) - xDstInSrc)/(double)(1<<16);
892 double y1,y2,y3,y4;
893 double A= -0.6;
894 if(flags & SWS_BICUBIC){
895 // Equation is from VirtualDub
896 y1 = ( + A*d - 2.0*A*d*d + A*d*d*d);
897 y2 = (+ 1.0 - (A+3.0)*d*d + (A+2.0)*d*d*d);
898 y3 = ( - A*d + (2.0*A+3.0)*d*d - (A+2.0)*d*d*d);
899 y4 = ( + A*d*d - A*d*d*d);
900 }else{
901 // cubic interpolation (derived it myself)
902 y1 = ( -2.0*d + 3.0*d*d - 1.0*d*d*d)/6.0;
903 y2 = (6.0 -3.0*d - 6.0*d*d + 3.0*d*d*d)/6.0;
904 y3 = ( +6.0*d + 3.0*d*d - 3.0*d*d*d)/6.0;
905 y4 = ( -1.0*d + 1.0*d*d*d)/6.0;
906 }
907
c7f822d9
MN
908 filter[i*filterSize + 0]= y1;
909 filter[i*filterSize + 1]= y2;
910 filter[i*filterSize + 2]= y3;
911 filter[i*filterSize + 3]= y4;
28bf81c9
MN
912 }
913 else
914 {
d8863d37 915 //Bilinear upscale / linear interpolate / Area averaging
c7f822d9 916 for(j=0; j<filterSize; j++)
28bf81c9
MN
917 {
918 double d= ABS((xx<<16) - xDstInSrc)/(double)(1<<16);
919 double coeff= 1.0 - d;
920 if(coeff<0) coeff=0;
c7f822d9 921 filter[i*filterSize + j]= coeff;
28bf81c9
MN
922 xx++;
923 }
924 }
925 xDstInSrc+= xInc;
926 }
927 }
928 else // downscale
929 {
930 int xDstInSrc;
81b7c056
MN
931 ASSERT(dstW <= srcW)
932
d8863d37
MN
933 if(flags&SWS_BICUBIC) filterSize= (int)ceil(1 + 4.0*srcW / (double)dstW);
934 else if(flags&SWS_X) filterSize= (int)ceil(1 + 4.0*srcW / (double)dstW);
935 else if(flags&SWS_AREA) filterSize= (int)ceil(1 + 1.0*srcW / (double)dstW);
936 else /* BILINEAR */ filterSize= (int)ceil(1 + 2.0*srcW / (double)dstW);
c7f822d9 937 filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
28bf81c9
MN
938
939 xDstInSrc= xInc/2 - 0x8000;
940 for(i=0; i<dstW; i++)
941 {
c7f822d9 942 int xx= (int)((double)xDstInSrc/(double)(1<<16) - (filterSize-1)*0.5 + 0.5);
28bf81c9 943 int j;
c7f822d9
MN
944 (*filterPos)[i]= xx;
945 for(j=0; j<filterSize; j++)
28bf81c9
MN
946 {
947 double d= ABS((xx<<16) - xDstInSrc)/(double)xInc;
948 double coeff;
949 if((flags & SWS_BICUBIC) || (flags & SWS_X))
950 {
951 double A= -0.75;
952// d*=2;
953 // Equation is from VirtualDub
954 if(d<1.0)
955 coeff = (1.0 - (A+3.0)*d*d + (A+2.0)*d*d*d);
956 else if(d<2.0)
957 coeff = (-4.0*A + 8.0*A*d - 5.0*A*d*d + A*d*d*d);
958 else
959 coeff=0.0;
960 }
d8863d37 961 else if(flags & SWS_AREA)
28bf81c9 962 {
d8863d37
MN
963 double srcPixelSize= (1<<16)/(double)xInc;
964 if(d + srcPixelSize/2 < 0.5) coeff= 1.0;
965 else if(d - srcPixelSize/2 < 0.5) coeff= (0.5-d)/srcPixelSize + 0.5;
966 else coeff=0.0;
967 }
28bf81c9
MN
968 else
969 {
970 coeff= 1.0 - d;
971 if(coeff<0) coeff=0;
972 }
c7f822d9 973 filter[i*filterSize + j]= coeff;
28bf81c9
MN
974 xx++;
975 }
976 xDstInSrc+= xInc;
977 }
978 }
979
c7f822d9
MN
980 /* apply src & dst Filter to filter -> filter2
981 free(filter);
982 */
81b7c056 983 ASSERT(filterSize>0)
c7f822d9
MN
984 filter2Size= filterSize;
985 if(srcFilter) filter2Size+= srcFilter->length - 1;
986 if(dstFilter) filter2Size+= dstFilter->length - 1;
81b7c056 987 ASSERT(filter2Size>0)
c7f822d9
MN
988 filter2= (double*)memalign(8, filter2Size*dstW*sizeof(double));
989
990 for(i=0; i<dstW; i++)
991 {
992 int j;
993 SwsVector scaleFilter;
994 SwsVector *outVec;
995
996 scaleFilter.coeff= filter + i*filterSize;
997 scaleFilter.length= filterSize;
998
5cebb24b 999 if(srcFilter) outVec= getConvVec(srcFilter, &scaleFilter);
c7f822d9
MN
1000 else outVec= &scaleFilter;
1001
1002 ASSERT(outVec->length == filter2Size)
1003 //FIXME dstFilter
1004
1005 for(j=0; j<outVec->length; j++)
1006 {
1007 filter2[i*filter2Size + j]= outVec->coeff[j];
1008 }
1009
1010 (*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2;
1011
1012 if(outVec != &scaleFilter) freeVec(outVec);
1013 }
1014 free(filter); filter=NULL;
1015
1016 /* try to reduce the filter-size (step1 find size and shift left) */
1017 // Assume its near normalized (*0.5 or *2.0 is ok but * 0.001 is not)
1018 minFilterSize= 0;
1019 for(i=dstW-1; i>=0; i--)
1020 {
1021 int min= filter2Size;
1022 int j;
1023 double cutOff=0.0;
1024
1025 /* get rid off near zero elements on the left by shifting left */
1026 for(j=0; j<filter2Size; j++)
1027 {
1028 int k;
1029 cutOff += ABS(filter2[i*filter2Size]);
1030
1031 if(cutOff > SWS_MAX_REDUCE_CUTOFF) break;
1032
1033 /* preserve Monotonicity because the core cant handle the filter otherwise */
1034 if(i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;
1035
1036 // Move filter coeffs left
1037 for(k=1; k<filter2Size; k++)
1038 filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
1039 filter2[i*filter2Size + k - 1]= 0.0;
1040 (*filterPos)[i]++;
1041 }
1042
1043 cutOff=0.0;
1044 /* count near zeros on the right */
1045 for(j=filter2Size-1; j>0; j--)
1046 {
1047 cutOff += ABS(filter2[i*filter2Size + j]);
1048
1049 if(cutOff > SWS_MAX_REDUCE_CUTOFF) break;
1050 min--;
1051 }
1052
1053 if(min>minFilterSize) minFilterSize= min;
1054 }
1055
81b7c056 1056 ASSERT(minFilterSize > 0)
6c7506de 1057 filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
81b7c056 1058 ASSERT(filterSize > 0)
6c7506de
MN
1059 filter= (double*)memalign(8, filterSize*dstW*sizeof(double));
1060 *outFilterSize= filterSize;
1061
4a53a912 1062 if(flags&SWS_PRINT_INFO)
0d9f3d85 1063 MSG_INFO("SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize);
c7f822d9
MN
1064 /* try to reduce the filter-size (step2 reduce it) */
1065 for(i=0; i<dstW; i++)
1066 {
1067 int j;
1068
6c7506de
MN
1069 for(j=0; j<filterSize; j++)
1070 {
1071 if(j>=filter2Size) filter[i*filterSize + j]= 0.0;
1072 else filter[i*filterSize + j]= filter2[i*filter2Size + j];
1073 }
c7f822d9 1074 }
6c7506de
MN
1075 free(filter2); filter2=NULL;
1076
c7f822d9
MN
1077
1078 //FIXME try to align filterpos if possible
1079
28bf81c9
MN
1080 //fix borders
1081 for(i=0; i<dstW; i++)
1082 {
1083 int j;
c7f822d9 1084 if((*filterPos)[i] < 0)
28bf81c9
MN
1085 {
1086 // Move filter coeffs left to compensate for filterPos
6c7506de 1087 for(j=1; j<filterSize; j++)
28bf81c9 1088 {
c7f822d9 1089 int left= MAX(j + (*filterPos)[i], 0);
6c7506de
MN
1090 filter[i*filterSize + left] += filter[i*filterSize + j];
1091 filter[i*filterSize + j]=0;
28bf81c9 1092 }
c7f822d9 1093 (*filterPos)[i]= 0;
28bf81c9
MN
1094 }
1095
6c7506de 1096 if((*filterPos)[i] + filterSize > srcW)
28bf81c9 1097 {
6c7506de 1098 int shift= (*filterPos)[i] + filterSize - srcW;
28bf81c9 1099 // Move filter coeffs right to compensate for filterPos
6c7506de 1100 for(j=filterSize-2; j>=0; j--)
28bf81c9 1101 {
6c7506de
MN
1102 int right= MIN(j + shift, filterSize-1);
1103 filter[i*filterSize +right] += filter[i*filterSize +j];
1104 filter[i*filterSize +j]=0;
28bf81c9 1105 }
6c7506de 1106 (*filterPos)[i]= srcW - filterSize;
28bf81c9
MN
1107 }
1108 }
1109
6c7506de
MN
1110 // Note the +1 is for the MMXscaler which reads over the end
1111 *outFilter= (int16_t*)memalign(8, *outFilterSize*(dstW+1)*sizeof(int16_t));
1112 memset(*outFilter, 0, *outFilterSize*(dstW+1)*sizeof(int16_t));
c7f822d9
MN
1113
1114 /* Normalize & Store in outFilter */
28bf81c9
MN
1115 for(i=0; i<dstW; i++)
1116 {
1117 int j;
1118 double sum=0;
1119 double scale= one;
6c7506de 1120 for(j=0; j<filterSize; j++)
28bf81c9 1121 {
6c7506de 1122 sum+= filter[i*filterSize + j];
28bf81c9
MN
1123 }
1124 scale/= sum;
6c7506de 1125 for(j=0; j<filterSize; j++)
28bf81c9 1126 {
6c7506de 1127 (*outFilter)[i*(*outFilterSize) + j]= (int)(filter[i*filterSize + j]*scale);
28bf81c9
MN
1128 }
1129 }
adeaecb9
MN
1130
1131 (*filterPos)[dstW]= (*filterPos)[dstW-1]; // the MMX scaler will read over the end
1132 for(i=0; i<*outFilterSize; i++)
1133 {
1134 int j= dstW*(*outFilterSize);
1135 (*outFilter)[j + i]= (*outFilter)[j + i - (*outFilterSize)];
1136 }
c7f822d9 1137
6c7506de 1138 free(filter);
7630f2e0 1139}
31190492 1140
28bf81c9 1141#ifdef ARCH_X86
b7dc6f66 1142static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
28bf81c9 1143{
b7dc6f66
MN
1144 uint8_t *fragmentA;
1145 int imm8OfPShufW1A;
1146 int imm8OfPShufW2A;
1147 int fragmentLengthA;
1148 uint8_t *fragmentB;
1149 int imm8OfPShufW1B;
1150 int imm8OfPShufW2B;
1151 int fragmentLengthB;
1152 int fragmentPos;
28bf81c9
MN
1153
1154 int xpos, i;
1155
1156 // create an optimized horizontal scaling routine
1157
1158 //code fragment
1159
1160 asm volatile(
1161 "jmp 9f \n\t"
1162 // Begin
1163 "0: \n\t"
b7dc6f66
MN
1164 "movq (%%edx, %%eax), %%mm3 \n\t"
1165 "movd (%%ecx, %%esi), %%mm0 \n\t"
1166 "movd 1(%%ecx, %%esi), %%mm1 \n\t"
1167 "punpcklbw %%mm7, %%mm1 \n\t"
1168 "punpcklbw %%mm7, %%mm0 \n\t"
28bf81c9
MN
1169 "pshufw $0xFF, %%mm1, %%mm1 \n\t"
1170 "1: \n\t"
28bf81c9
MN
1171 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
1172 "2: \n\t"
28bf81c9 1173 "psubw %%mm1, %%mm0 \n\t"
b7dc6f66 1174 "movl 8(%%ebx, %%eax), %%esi \n\t"
28bf81c9 1175 "pmullw %%mm3, %%mm0 \n\t"
28bf81c9
MN
1176 "psllw $7, %%mm1 \n\t"
1177 "paddw %%mm1, %%mm0 \n\t"
1178
1179 "movq %%mm0, (%%edi, %%eax) \n\t"
1180
1181 "addl $8, %%eax \n\t"
1182 // End
1183 "9: \n\t"
1184// "int $3\n\t"
1185 "leal 0b, %0 \n\t"
1186 "leal 1b, %1 \n\t"
1187 "leal 2b, %2 \n\t"
1188 "decl %1 \n\t"
1189 "decl %2 \n\t"
1190 "subl %0, %1 \n\t"
1191 "subl %0, %2 \n\t"
1192 "leal 9b, %3 \n\t"
1193 "subl %0, %3 \n\t"
b7dc6f66
MN
1194
1195
1196 :"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
1197 "=r" (fragmentLengthA)
28bf81c9
MN
1198 );
1199
b7dc6f66
MN
1200 asm volatile(
1201 "jmp 9f \n\t"
1202 // Begin
1203 "0: \n\t"
1204 "movq (%%edx, %%eax), %%mm3 \n\t"
1205 "movd (%%ecx, %%esi), %%mm0 \n\t"
1206 "punpcklbw %%mm7, %%mm0 \n\t"
1207 "pshufw $0xFF, %%mm0, %%mm1 \n\t"
1208 "1: \n\t"
1209 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
1210 "2: \n\t"
1211 "psubw %%mm1, %%mm0 \n\t"
1212 "movl 8(%%ebx, %%eax), %%esi \n\t"
1213 "pmullw %%mm3, %%mm0 \n\t"
1214 "psllw $7, %%mm1 \n\t"
1215 "paddw %%mm1, %%mm0 \n\t"
1216
1217 "movq %%mm0, (%%edi, %%eax) \n\t"
28bf81c9 1218
b7dc6f66
MN
1219 "addl $8, %%eax \n\t"
1220 // End
1221 "9: \n\t"
1222// "int $3\n\t"
1223 "leal 0b, %0 \n\t"
1224 "leal 1b, %1 \n\t"
1225 "leal 2b, %2 \n\t"
1226 "decl %1 \n\t"
1227 "decl %2 \n\t"
1228 "subl %0, %1 \n\t"
1229 "subl %0, %2 \n\t"
1230 "leal 9b, %3 \n\t"
1231 "subl %0, %3 \n\t"
1232
1233
1234 :"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
1235 "=r" (fragmentLengthB)
1236 );
1237
1238 xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
1239 fragmentPos=0;
1240
1241 for(i=0; i<dstW/numSplits; i++)
28bf81c9
MN
1242 {
1243 int xx=xpos>>16;
1244
1245 if((i&3) == 0)
1246 {
1247 int a=0;
1248 int b=((xpos+xInc)>>16) - xx;
1249 int c=((xpos+xInc*2)>>16) - xx;
1250 int d=((xpos+xInc*3)>>16) - xx;
1251
b7dc6f66
MN
1252 filter[i ] = (( xpos & 0xFFFF) ^ 0xFFFF)>>9;
1253 filter[i+1] = (((xpos+xInc ) & 0xFFFF) ^ 0xFFFF)>>9;
1254 filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9;
1255 filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
1256 filterPos[i/2]= xx;
1257
1258 if(d+1<4)
1259 {
1260 int maxShift= 3-(d+1);
1261 int shift=0;
1262
1263 memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB);
1264
1265 funnyCode[fragmentPos + imm8OfPShufW1B]=
1266 (a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6);
1267 funnyCode[fragmentPos + imm8OfPShufW2B]=
1268 a | (b<<2) | (c<<4) | (d<<6);
1269
1270 if(i+3>=dstW) shift=maxShift; //avoid overread
1271 else if((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
1272
1273 if(shift && i>=shift)
1274 {
1275 funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift;
1276 funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift;
1277 filterPos[i/2]-=shift;
1278 }
1279
1280 fragmentPos+= fragmentLengthB;
1281 }
1282 else
1283 {
1284 int maxShift= 3-d;
1285 int shift=0;
1286
1287 memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA);
28bf81c9 1288
b7dc6f66
MN
1289 funnyCode[fragmentPos + imm8OfPShufW1A]=
1290 funnyCode[fragmentPos + imm8OfPShufW2A]=
1291 a | (b<<2) | (c<<4) | (d<<6);
28bf81c9 1292
b7dc6f66
MN
1293 if(i+4>=dstW) shift=maxShift; //avoid overread
1294 else if((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //partial align
28bf81c9 1295
b7dc6f66
MN
1296 if(shift && i>=shift)
1297 {
1298 funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift;
1299 funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift;
1300 filterPos[i/2]-=shift;
1301 }
1302
1303 fragmentPos+= fragmentLengthA;
1304 }
1305
1306 funnyCode[fragmentPos]= RET;
28bf81c9
MN
1307 }
1308 xpos+=xInc;
1309 }
b7dc6f66 1310 filterPos[i/2]= xpos>>16; // needed to jump to the next part
28bf81c9
MN
1311}
1312#endif // ARCH_X86
1313
1314//FIXME remove
31190492 1315void SwScale_Init(){
28bf81c9
MN
1316}
1317
1318static void globalInit(){
31190492
A
1319 // generating tables:
1320 int i;
c1b0bfb4
MN
1321 for(i=0; i<768; i++){
1322 int c= MIN(MAX(i-256, 0), 255);
1323 clip_table[i]=c;
1324 yuvtab_2568[c]= clip_yuvtab_2568[i]=(0x2568*(c-16))+(256<<13);
1325 yuvtab_3343[c]= clip_yuvtab_3343[i]=0x3343*(c-128);
1326 yuvtab_0c92[c]= clip_yuvtab_0c92[i]=-0x0c92*(c-128);
1327 yuvtab_1a1e[c]= clip_yuvtab_1a1e[i]=-0x1a1e*(c-128);
1328 yuvtab_40cf[c]= clip_yuvtab_40cf[i]=0x40cf*(c-128);
31190492
A
1329 }
1330
b18ea156
MN
1331 for(i=0; i<768; i++)
1332 {
28bf81c9 1333 int v= clip_table[i];
daa57641
MN
1334 clip_table16b[i]= v>>3;
1335 clip_table16g[i]= (v<<3)&0x07E0;
1336 clip_table16r[i]= (v<<8)&0xF800;
1337 clip_table15b[i]= v>>3;
1338 clip_table15g[i]= (v<<2)&0x03E0;
1339 clip_table15r[i]= (v<<7)&0x7C00;
b18ea156 1340 }
c1b0bfb4 1341
28bf81c9
MN
1342cpuCaps= gCpuCaps;
1343
1344#ifdef RUNTIME_CPUDETECT
1345#ifdef CAN_COMPILE_X86_ASM
1346 // ordered per speed fasterst first
1347 if(gCpuCaps.hasMMX2)
1348 swScale= swScale_MMX2;
1349 else if(gCpuCaps.has3DNow)
7f56a527 1350 swScale= swScale_3DNow;
28bf81c9
MN
1351 else if(gCpuCaps.hasMMX)
1352 swScale= swScale_MMX;
1353 else
1354 swScale= swScale_C;
1355
1356#else
1357 swScale= swScale_C;
1358 cpuCaps.hasMMX2 = cpuCaps.hasMMX = cpuCaps.has3DNow = 0;
1359#endif
1360#else //RUNTIME_CPUDETECT
1361#ifdef HAVE_MMX2
1362 swScale= swScale_MMX2;
1363 cpuCaps.has3DNow = 0;
1364#elif defined (HAVE_3DNOW)
7f56a527 1365 swScale= swScale_3DNow;
28bf81c9
MN
1366 cpuCaps.hasMMX2 = 0;
1367#elif defined (HAVE_MMX)
1368 swScale= swScale_MMX;
1369 cpuCaps.hasMMX2 = cpuCaps.has3DNow = 0;
1370#else
1371 swScale= swScale_C;
1372 cpuCaps.hasMMX2 = cpuCaps.hasMMX = cpuCaps.has3DNow = 0;
1373#endif
1374#endif //!RUNTIME_CPUDETECT
31190492 1375}
7630f2e0 1376
0d9f3d85
A
1377static void PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1378 int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1379 uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1380 /* Copy Y plane */
1381 if(dstStride[0]==srcStride[0])
1382 memcpy(dst, src[0], srcSliceH*dstStride[0]);
1383 else
1384 {
1385 int i;
1386 uint8_t *srcPtr= src[0];
1387 uint8_t *dstPtr= dst;
1388 for(i=0; i<srcSliceH; i++)
1389 {
1390 memcpy(dstPtr, srcPtr, srcStride[0]);
1391 srcPtr+= srcStride[0];
1392 dstPtr+= dstStride[0];
1393 }
1394 }
1395 dst = dstParam[1] + dstStride[1]*srcSliceY;
1396 if(c->srcFormat==IMGFMT_YV12)
1397 interleaveBytes( src[1],src[2],dst,c->srcW,srcSliceH,srcStride[1],srcStride[2],dstStride[0] );
1398 else /* I420 & IYUV */
1399 interleaveBytes( src[2],src[1],dst,c->srcW,srcSliceH,srcStride[2],srcStride[1],dstStride[0] );
1400}
1401
1402
37079906
MN
1403/* Warper functions for yuv2bgr */
1404static void planarYuvToBgr(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
b6654a54
MN
1405 int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1406 uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
37079906
MN
1407
1408 if(c->srcFormat==IMGFMT_YV12)
b6654a54 1409 yuv2rgb( dst,src[0],src[1],src[2],c->srcW,srcSliceH,dstStride[0],srcStride[0],srcStride[1] );
37079906 1410 else /* I420 & IYUV */
b6654a54
MN
1411 yuv2rgb( dst,src[0],src[2],src[1],c->srcW,srcSliceH,dstStride[0],srcStride[0],srcStride[1] );
1412}
1413
44c1035c 1414static void PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
0d9f3d85
A
1415 int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1416 uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1417
1418 if(c->srcFormat==IMGFMT_YV12)
1419 yv12toyuy2( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
1420 else /* I420 & IYUV */
1421 yv12toyuy2( src[0],src[2],src[1],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
1422}
1423
b935781b
MN
1424static void bgr24to32Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1425 int srcSliceH, uint8_t* dst[], int dstStride[]){
1426
1427 if(dstStride[0]*3==srcStride[0]*4)
4bb3fa5e 1428 rgb24to32(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
b935781b
MN
1429 else
1430 {
1431 int i;
1432 uint8_t *srcPtr= src[0];
1433 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1434
1435 for(i=0; i<srcSliceH; i++)
1436 {
4bb3fa5e 1437 rgb24to32(srcPtr, dstPtr, c->srcW*3);
b935781b
MN
1438 srcPtr+= srcStride[0];
1439 dstPtr+= dstStride[0];
1440 }
1441 }
1442}
1443
0d9f3d85
A
1444static void bgr24to16Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1445 int srcSliceH, uint8_t* dst[], int dstStride[]){
1446
1447 if(dstStride[0]*3==srcStride[0]*2)
1448 rgb24to16(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1449 else
1450 {
1451 int i;
1452 uint8_t *srcPtr= src[0];
1453 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1454
1455 for(i=0; i<srcSliceH; i++)
1456 {
1457 rgb24to16(srcPtr, dstPtr, c->srcW*3);
1458 srcPtr+= srcStride[0];
1459 dstPtr+= dstStride[0];
1460 }
1461 }
1462}
1463
1464static void bgr24to15Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1465 int srcSliceH, uint8_t* dst[], int dstStride[]){
1466
1467 if(dstStride[0]*3==srcStride[0]*2)
1468 rgb24to15(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1469 else
1470 {
1471 int i;
1472 uint8_t *srcPtr= src[0];
1473 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1474
1475 for(i=0; i<srcSliceH; i++)
1476 {
1477 rgb24to15(srcPtr, dstPtr, c->srcW*3);
1478 srcPtr+= srcStride[0];
1479 dstPtr+= dstStride[0];
1480 }
1481 }
1482}
1483
b935781b
MN
1484static void bgr32to24Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1485 int srcSliceH, uint8_t* dst[], int dstStride[]){
1486
1487 if(dstStride[0]*4==srcStride[0]*3)
4bb3fa5e 1488 rgb32to24(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
b935781b
MN
1489 else
1490 {
1491 int i;
1492 uint8_t *srcPtr= src[0];
1493 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1494
1495 for(i=0; i<srcSliceH; i++)
1496 {
4bb3fa5e
MN
1497 rgb32to24(srcPtr, dstPtr, c->srcW<<2);
1498 srcPtr+= srcStride[0];
1499 dstPtr+= dstStride[0];
1500 }
1501 }
1502}
1503
0d9f3d85
A
1504static void bgr32to16Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1505 int srcSliceH, uint8_t* dst[], int dstStride[]){
1506
1507 if(dstStride[0]*4==srcStride[0]*2)
1508 rgb32to16(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1509 else
1510 {
1511 int i;
1512 uint8_t *srcPtr= src[0];
1513 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1514
1515 for(i=0; i<srcSliceH; i++)
1516 {
1517 rgb32to16(srcPtr, dstPtr, c->srcW<<2);
1518 srcPtr+= srcStride[0];
1519 dstPtr+= dstStride[0];
1520 }
1521 }
1522}
1523
1524static void bgr32to15Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1525 int srcSliceH, uint8_t* dst[], int dstStride[]){
1526
1527 if(dstStride[0]*4==srcStride[0]*2)
1528 rgb32to15(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1529 else
1530 {
1531 int i;
1532 uint8_t *srcPtr= src[0];
1533 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1534
1535 for(i=0; i<srcSliceH; i++)
1536 {
1537 rgb32to15(srcPtr, dstPtr, c->srcW<<2);
1538 srcPtr+= srcStride[0];
1539 dstPtr+= dstStride[0];
1540 }
1541 }
1542}
1543
4bb3fa5e
MN
1544static void bgr15to16Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1545 int srcSliceH, uint8_t* dst[], int dstStride[]){
1546
1547 if(dstStride[0]==srcStride[0])
1548 rgb15to16(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1549 else
1550 {
1551 int i;
1552 uint8_t *srcPtr= src[0];
1553 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1554
1555 for(i=0; i<srcSliceH; i++)
1556 {
1557 rgb15to16(srcPtr, dstPtr, c->srcW<<1);
b935781b
MN
1558 srcPtr+= srcStride[0];
1559 dstPtr+= dstStride[0];
1560 }
1561 }
1562}
1563
0d9f3d85
A
1564static void bgr15to24Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1565 int srcSliceH, uint8_t* dst[], int dstStride[]){
1566
1567 if(dstStride[0]*2==srcStride[0]*3)
1568 rgb15to24(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1569 else
1570 {
1571 int i;
1572 uint8_t *srcPtr= src[0];
1573 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1574
1575 for(i=0; i<srcSliceH; i++)
1576 {
1577 rgb15to24(srcPtr, dstPtr, c->srcW<<1);
1578 srcPtr+= srcStride[0];
1579 dstPtr+= dstStride[0];
1580 }
1581 }
1582}
1583
1584static void bgr15to32Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1585 int srcSliceH, uint8_t* dst[], int dstStride[]){
1586
1587 if(dstStride[0]*2==srcStride[0]*4)
1588 rgb15to32(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1589 else
1590 {
1591 int i;
1592 uint8_t *srcPtr= src[0];
1593 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1594
1595 for(i=0; i<srcSliceH; i++)
1596 {
1597 rgb15to32(srcPtr, dstPtr, c->srcW<<1);
1598 srcPtr+= srcStride[0];
1599 dstPtr+= dstStride[0];
1600 }
1601 }
1602}
1603
1604static void bgr16to24Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1605 int srcSliceH, uint8_t* dst[], int dstStride[]){
1606
1607 if(dstStride[0]*2==srcStride[0]*3)
1608 rgb16to24(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1609 else
1610 {
1611 int i;
1612 uint8_t *srcPtr= src[0];
1613 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1614
1615 for(i=0; i<srcSliceH; i++)
1616 {
1617 rgb16to24(srcPtr, dstPtr, c->srcW<<1);
1618 srcPtr+= srcStride[0];
1619 dstPtr+= dstStride[0];
1620 }
1621 }
1622}
1623
1624static void bgr16to32Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1625 int srcSliceH, uint8_t* dst[], int dstStride[]){
1626
1627 if(dstStride[0]*2==srcStride[0]*4)
1628 rgb16to32(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1629 else
1630 {
1631 int i;
1632 uint8_t *srcPtr= src[0];
1633 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1634
1635 for(i=0; i<srcSliceH; i++)
1636 {
1637 rgb16to32(srcPtr, dstPtr, c->srcW<<1);
1638 srcPtr+= srcStride[0];
1639 dstPtr+= dstStride[0];
1640 }
1641 }
1642}
1643
ec22603f
MN
1644static void bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1645 int srcSliceH, uint8_t* dst[], int dstStride[]){
1646
1647 rgb24toyv12(
1648 src[0],
1649 dst[0]+ srcSliceY *dstStride[0],
1650 dst[1]+(srcSliceY>>1)*dstStride[1],
1651 dst[2]+(srcSliceY>>1)*dstStride[2],
1652 c->srcW, srcSliceH,
1653 dstStride[0], dstStride[1], srcStride[0]);
1654}
1655
44c1035c
MN
1656/**
1657 * bring pointers in YUV order instead of YVU
1658 */
c7a810cc
MN
1659static inline void orderYUV(int format, uint8_t * sortedP[], int sortedStride[], uint8_t * p[], int stride[]){
1660 if(format == IMGFMT_YV12){
44c1035c
MN
1661 sortedP[0]= p[0];
1662 sortedP[1]= p[1];
1663 sortedP[2]= p[2];
c7a810cc
MN
1664 sortedStride[0]= stride[0];
1665 sortedStride[1]= stride[1];
1666 sortedStride[2]= stride[2];
44c1035c 1667 }
c7a810cc 1668 else if(isPacked(format) || isGray(format))
44c1035c
MN
1669 {
1670 sortedP[0]= p[0];
1671 sortedP[1]=
1672 sortedP[2]= NULL;
c7a810cc 1673 sortedStride[0]= stride[0];
44c1035c
MN
1674 sortedStride[1]=
1675 sortedStride[2]= 0;
1676 }
1677 else /* I420 */
1678 {
1679 sortedP[0]= p[0];
1680 sortedP[1]= p[2];
1681 sortedP[2]= p[1];
c7a810cc
MN
1682 sortedStride[0]= stride[0];
1683 sortedStride[1]= stride[2];
1684 sortedStride[2]= stride[1];
44c1035c
MN
1685 }
1686}
b935781b 1687
b6654a54
MN
1688/* unscaled copy like stuff (assumes nearly identical formats) */
1689static void simpleCopy(SwsContext *c, uint8_t* srcParam[], int srcStrideParam[], int srcSliceY,
44c1035c 1690 int srcSliceH, uint8_t* dstParam[], int dstStrideParam[]){
b6654a54
MN
1691
1692 int srcStride[3];
44c1035c 1693 int dstStride[3];
b6654a54
MN
1694 uint8_t *src[3];
1695 uint8_t *dst[3];
1696
c7a810cc
MN
1697 orderYUV(c->srcFormat, src, srcStride, srcParam, srcStrideParam);
1698 orderYUV(c->dstFormat, dst, dstStride, dstParam, dstStrideParam);
b6654a54
MN
1699
1700 if(isPacked(c->srcFormat))
1701 {
1702 if(dstStride[0]==srcStride[0])
1703 memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]);
1704 else
1705 {
1706 int i;
1707 uint8_t *srcPtr= src[0];
1708 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
a861d4d7
MN
1709 int length=0;
1710
1711 /* universal length finder */
9bd8bd1a
MN
1712 while(length+c->srcW <= ABS(dstStride[0])
1713 && length+c->srcW <= ABS(srcStride[0])) length+= c->srcW;
a861d4d7 1714 ASSERT(length!=0);
b6654a54
MN
1715
1716 for(i=0; i<srcSliceH; i++)
1717 {
1718 memcpy(dstPtr, srcPtr, length);
1719 srcPtr+= srcStride[0];
1720 dstPtr+= dstStride[0];
1721 }
1722 }
1723 }
1724 else
44c1035c 1725 { /* Planar YUV or gray */
b6654a54
MN
1726 int plane;
1727 for(plane=0; plane<3; plane++)
1728 {
44c1035c
MN
1729 int length= plane==0 ? c->srcW : ((c->srcW+1)>>1);
1730 int y= plane==0 ? srcSliceY: ((srcSliceY+1)>>1);
1731 int height= plane==0 ? srcSliceH: ((srcSliceH+1)>>1);
1732
1733 if((isGray(c->srcFormat) || isGray(c->dstFormat)) && plane>0)
0d9f3d85 1734 {
44c1035c
MN
1735 if(!isGray(c->dstFormat))
1736 memset(dst[plane], 0, dstStride[plane]*height);
0d9f3d85 1737 }
b6654a54
MN
1738 else
1739 {
44c1035c
MN
1740 if(dstStride[plane]==srcStride[plane])
1741 memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]);
1742 else
b6654a54 1743 {
44c1035c
MN
1744 int i;
1745 uint8_t *srcPtr= src[plane];
1746 uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
1747 for(i=0; i<height; i++)
1748 {
1749 memcpy(dstPtr, srcPtr, length);
1750 srcPtr+= srcStride[plane];
1751 dstPtr+= dstStride[plane];
1752 }
b6654a54
MN
1753 }
1754 }
1755 }
1756 }
37079906 1757}
28bf81c9 1758
44c1035c 1759static int remove_dup_fourcc(int fourcc)
0d9f3d85
A
1760{
1761 switch(fourcc)
1762 {
1763 case IMGFMT_IYUV: return IMGFMT_I420;
1764 case IMGFMT_Y8 : return IMGFMT_Y800;
1765 default: return fourcc;
1766 }
1767}
1768
c7a810cc
MN
1769static void getSubSampleFactors(int *h, int *v, int format){
1770 switch(format){
1771 case IMGFMT_YUY2:
1772 *h=1;
1773 *v=0;
1774 break;
1775 case IMGFMT_YV12:
1776 case IMGFMT_I420:
1777 *h=1;
1778 *v=1;
1779 break;
1780 case IMGFMT_YVU9:
1781 *h=2;
1782 *v=2;
1783 break;
1784 default:
1785 *h=0;
1786 *v=0;
1787 break;
1788 }
1789}
1790
28bf81c9
MN
1791SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat, int flags,
1792 SwsFilter *srcFilter, SwsFilter *dstFilter){
1793
28bf81c9
MN
1794 SwsContext *c;
1795 int i;
37079906 1796 int usesFilter;
44c1035c 1797 int unscaled;
c7f822d9 1798 SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
5cebb24b
MN
1799#ifdef ARCH_X86
1800 if(gCpuCaps.hasMMX)
1801 asm volatile("emms\n\t"::: "memory");
1802#endif
28bf81c9
MN
1803 if(swScale==NULL) globalInit();
1804
6ff0ad6b 1805 /* avoid dupplicate Formats, so we dont need to check to much */
0d9f3d85
A
1806 srcFormat = remove_dup_fourcc(srcFormat);
1807 dstFormat = remove_dup_fourcc(dstFormat);
44c1035c
MN
1808
1809 unscaled = (srcW == dstW && srcH == dstH);
1810
1811 if(!isSupportedIn(srcFormat))
b81cf274 1812 {
44c1035c
MN
1813 MSG_ERR("swScaler: %s is not supported as input format\n", vo_format_name(srcFormat));
1814 return NULL;
1815 }
1816 if(!isSupportedOut(dstFormat))
1817 {
1818 MSG_ERR("swScaler: %s is not supported as output format\n", vo_format_name(dstFormat));
1819 return NULL;
b81cf274 1820 }
44c1035c 1821
28bf81c9 1822 /* sanity check */
b81cf274
MN
1823 if(srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
1824 {
0d9f3d85 1825 MSG_ERR("swScaler: %dx%d -> %dx%d is invalid scaling dimension\n",
b81cf274
MN
1826 srcW, srcH, dstW, dstH);
1827 return NULL;
1828 }
28bf81c9 1829
c7f822d9
MN
1830 if(!dstFilter) dstFilter= &dummyFilter;
1831 if(!srcFilter) srcFilter= &dummyFilter;
1832
28bf81c9 1833 c= memalign(64, sizeof(SwsContext));
c7f822d9 1834 memset(c, 0, sizeof(SwsContext));
28bf81c9
MN
1835
1836 c->srcW= srcW;
1837 c->srcH= srcH;
1838 c->dstW= dstW;
1839 c->dstH= dstH;
5521b193
MN
1840 c->lumXInc= ((srcW<<16) + (dstW>>1))/dstW;
1841 c->lumYInc= ((srcH<<16) + (dstH>>1))/dstH;
28bf81c9
MN
1842 c->flags= flags;
1843 c->dstFormat= dstFormat;
1844 c->srcFormat= srcFormat;
1845
37079906
MN
1846 usesFilter=0;
1847 if(dstFilter->lumV!=NULL && dstFilter->lumV->length>1) usesFilter=1;
1848 if(dstFilter->lumH!=NULL && dstFilter->lumH->length>1) usesFilter=1;
1849 if(dstFilter->chrV!=NULL && dstFilter->chrV->length>1) usesFilter=1;
1850 if(dstFilter->chrH!=NULL && dstFilter->chrH->length>1) usesFilter=1;
1851 if(srcFilter->lumV!=NULL && srcFilter->lumV->length>1) usesFilter=1;
1852 if(srcFilter->lumH!=NULL && srcFilter->lumH->length>1) usesFilter=1;
1853 if(srcFilter->chrV!=NULL && srcFilter->chrV->length>1) usesFilter=1;
1854 if(srcFilter->chrH!=NULL && srcFilter->chrH->length>1) usesFilter=1;
1855
b935781b 1856 /* unscaled special Cases */
44c1035c 1857 if(unscaled && !usesFilter)
37079906 1858 {
0d9f3d85
A
1859 /* yv12_to_nv12 */
1860 if((srcFormat == IMGFMT_YV12||srcFormat==IMGFMT_I420)&&dstFormat == IMGFMT_NV12)
1861 {
1862 c->swScale= PlanarToNV12Wrapper;
1863
1864 if(flags&SWS_PRINT_INFO)
1865 MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
1866 vo_format_name(srcFormat), vo_format_name(dstFormat));
1867 return c;
1868 }
1869 /* yv12_to_yuy2 */
1870 if((srcFormat == IMGFMT_YV12||srcFormat==IMGFMT_I420)&&dstFormat == IMGFMT_YUY2)
1871 {
44c1035c 1872 c->swScale= PlanarToYuy2Wrapper;
0d9f3d85
A
1873
1874 if(flags&SWS_PRINT_INFO)
1875 MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
1876 vo_format_name(srcFormat), vo_format_name(dstFormat));
1877 return c;
1878 }
37079906
MN
1879 /* yuv2bgr */
1880 if(isPlanarYUV(srcFormat) && isBGR(dstFormat))
1881 {
1882 // FIXME multiple yuv2rgb converters wont work that way cuz that thing is full of globals&statics
1e1c4fe9 1883#ifdef WORDS_BIGENDIAN
daa57641
MN
1884 if(dstFormat==IMGFMT_BGR32)
1885 yuv2rgb_init( dstFormat&0xFF /* =bpp */, MODE_BGR);
1886 else
1887 yuv2rgb_init( dstFormat&0xFF /* =bpp */, MODE_RGB);
1e1c4fe9 1888#else
b6654a54 1889 yuv2rgb_init( dstFormat&0xFF /* =bpp */, MODE_RGB);
1e1c4fe9 1890#endif
37079906 1891 c->swScale= planarYuvToBgr;
b6654a54
MN
1892
1893 if(flags&SWS_PRINT_INFO)
0d9f3d85 1894 MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
b6654a54
MN
1895 vo_format_name(srcFormat), vo_format_name(dstFormat));
1896 return c;
1897 }
1898
1899 /* simple copy */
44c1035c
MN
1900 if(srcFormat == dstFormat
1901 || ((isPlanarYUV(srcFormat)||isGray(srcFormat)) && (isPlanarYUV(dstFormat)||isGray(dstFormat))))
b6654a54
MN
1902 {
1903 c->swScale= simpleCopy;
1904
37079906 1905 if(flags&SWS_PRINT_INFO)
0d9f3d85 1906 MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
37079906
MN
1907 vo_format_name(srcFormat), vo_format_name(dstFormat));
1908 return c;
1909 }
44c1035c 1910
b935781b
MN
1911 /* bgr32to24 & rgb32to24*/
1912 if((srcFormat==IMGFMT_BGR32 && dstFormat==IMGFMT_BGR24)
1913 ||(srcFormat==IMGFMT_RGB32 && dstFormat==IMGFMT_RGB24))
1914 {
1915 c->swScale= bgr32to24Wrapper;
1916
1917 if(flags&SWS_PRINT_INFO)
0d9f3d85
A
1918 MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
1919 vo_format_name(srcFormat), vo_format_name(dstFormat));
1920 return c;
1921 }
1922
1923 /* bgr32to16 & rgb32to16*/
1924 if((srcFormat==IMGFMT_BGR32 && dstFormat==IMGFMT_BGR16)
1925 ||(srcFormat==IMGFMT_RGB32 && dstFormat==IMGFMT_RGB16))
1926 {
1927 c->swScale= bgr32to16Wrapper;
1928
1929 if(flags&SWS_PRINT_INFO)
1930 MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
1931 vo_format_name(srcFormat), vo_format_name(dstFormat));
1932 return c;
1933 }
1934
1935 /* bgr32to15 & rgb32to15*/
1936 if((srcFormat==IMGFMT_BGR32 && dstFormat==IMGFMT_BGR15)
1937 ||(srcFormat==IMGFMT_RGB32 && dstFormat==IMGFMT_RGB15))
1938 {
1939 c->swScale= bgr32to15Wrapper;
1940
1941 if(flags&SWS_PRINT_INFO)
1942 MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
b935781b
MN
1943 vo_format_name(srcFormat), vo_format_name(dstFormat));
1944 return c;
1945 }
1946
1947 /* bgr24to32 & rgb24to32*/
1948 if((srcFormat==IMGFMT_BGR24 && dstFormat==IMGFMT_BGR32)
1949 ||(srcFormat==IMGFMT_RGB24 && dstFormat==IMGFMT_RGB32))
1950 {
1951 c->swScale= bgr24to32Wrapper;
1952
1953 if(flags&SWS_PRINT_INFO)
0d9f3d85
A
1954 MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
1955 vo_format_name(srcFormat), vo_format_name(dstFormat));
1956 return c;
1957 }
1958
1959 /* bgr24to16 & rgb24to16*/
1960 if((srcFormat==IMGFMT_BGR24 && dstFormat==IMGFMT_BGR16)
1961 ||(srcFormat==IMGFMT_RGB24 && dstFormat==IMGFMT_RGB16))
1962 {
1963 c->swScale= bgr24to16Wrapper;
1964
1965 if(flags&SWS_PRINT_INFO)
1966 MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
1967 vo_format_name(srcFormat), vo_format_name(dstFormat));
1968 return c;
1969 }
1970
1971 /* bgr24to15 & rgb24to15*/
1972 if((srcFormat==IMGFMT_BGR24 && dstFormat==IMGFMT_BGR15)
1973 ||(srcFormat==IMGFMT_RGB24 && dstFormat==IMGFMT_RGB15))
1974 {
1975 c->swScale= bgr24to15Wrapper;
1976
1977 if(flags&SWS_PRINT_INFO)
1978 MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
b935781b
MN
1979 vo_format_name(srcFormat), vo_format_name(dstFormat));
1980 return c;
1981 }
4bb3fa5e
MN
1982
1983 /* bgr15to16 */
1984 if(srcFormat==IMGFMT_BGR15 && dstFormat==IMGFMT_BGR16)
1985 {
1986 c->swScale= bgr15to16Wrapper;
1987
1988 if(flags&SWS_PRINT_INFO)
0d9f3d85
A
1989 MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
1990 vo_format_name(srcFormat), vo_format_name(dstFormat));
1991 return c;
1992 }
1993
1994 /* bgr15to24 */
1995 if((srcFormat==IMGFMT_BGR15 && dstFormat==IMGFMT_BGR24)
1996 ||(srcFormat==IMGFMT_RGB15 && dstFormat==IMGFMT_RGB24))
1997 {
1998 c->swScale= bgr15to24Wrapper;
1999
2000 if(flags&SWS_PRINT_INFO)
2001 MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
2002 vo_format_name(srcFormat), vo_format_name(dstFormat));
2003 return c;
2004 }
2005
2006 /* bgr15to32 */
2007 if((srcFormat==IMGFMT_BGR15 && dstFormat==IMGFMT_BGR32)
2008 ||(srcFormat==IMGFMT_RGB15 && dstFormat==IMGFMT_RGB32))
2009 {
2010 c->swScale= bgr15to32Wrapper;
2011
2012 if(flags&SWS_PRINT_INFO)
2013 MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
2014 vo_format_name(srcFormat), vo_format_name(dstFormat));
2015 return c;
2016 }
2017
2018 /* bgr16to24 */
2019 if((srcFormat==IMGFMT_BGR16 && dstFormat==IMGFMT_BGR24)
2020 ||(srcFormat==IMGFMT_RGB16 && dstFormat==IMGFMT_RGB24))
2021 {
2022 c->swScale= bgr16to24Wrapper;
2023
2024 if(flags&SWS_PRINT_INFO)
2025 MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
2026 vo_format_name(srcFormat), vo_format_name(dstFormat));
2027 return c;
2028 }
2029
2030 /* bgr16to32 */
2031 if((srcFormat==IMGFMT_BGR16 && dstFormat==IMGFMT_BGR32)
2032 ||(srcFormat==IMGFMT_RGB16 && dstFormat==IMGFMT_RGB32))
2033 {
2034 c->swScale= bgr16to32Wrapper;
2035
2036 if(flags&SWS_PRINT_INFO)
2037 MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
4bb3fa5e
MN
2038 vo_format_name(srcFormat), vo_format_name(dstFormat));
2039 return c;
2040 }
ec22603f
MN
2041
2042 /* bgr24toYV12 */
2043 if(srcFormat==IMGFMT_BGR24 && dstFormat==IMGFMT_YV12)
2044 {
2045 c->swScale= bgr24toyv12Wrapper;
2046
2047 if(flags&SWS_PRINT_INFO)
0d9f3d85 2048 MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
ec22603f
MN
2049 vo_format_name(srcFormat), vo_format_name(dstFormat));
2050 return c;
2051 }
37079906
MN
2052 }
2053
28bf81c9
MN
2054 if(cpuCaps.hasMMX2)
2055 {
2056 c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
2057 if(!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR))
2058 {
2059 if(flags&SWS_PRINT_INFO)
0d9f3d85 2060 MSG_INFO("SwScaler: output Width is not a multiple of 32 -> no MMX2 scaler\n");
28bf81c9
MN
2061 }
2062 }
2063 else
2064 c->canMMX2BeUsed=0;
2065
c7a810cc
MN
2066 getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
2067 getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
1e621b18 2068
c7a810cc
MN
2069 // reuse chroma for 2 pixles rgb/bgr unless user wants full chroma interpolation
2070 if((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1;
1e621b18 2071
c7a810cc
MN
2072 // drop eery 2. pixel for chroma calculation unless user wants full chroma
2073 if((isBGR(srcFormat) || isRGB(srcFormat) || srcFormat==IMGFMT_YUY2) && !(flags&SWS_FULL_CHR_V))
2074 c->chrSrcVSubSample=1;
1e621b18 2075
c7a810cc
MN
2076 // drop eery 2. pixel for chroma calculation unless user wants full chroma
2077 if((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP))
2078 c->chrSrcHSubSample=1;
1e621b18 2079
c7a810cc
MN
2080 c->chrIntHSubSample= c->chrDstHSubSample;
2081 c->chrIntVSubSample= c->chrSrcVSubSample;
2082
2083 // note the -((-x)>>y) is so that we allways round toward +inf
2084 c->chrSrcW= -((-srcW) >> c->chrSrcHSubSample);
2085 c->chrSrcH= -((-srcH) >> c->chrSrcVSubSample);
2086 c->chrDstW= -((-dstW) >> c->chrDstHSubSample);
2087 c->chrDstH= -((-dstH) >> c->chrDstVSubSample);
2088/* printf("%d %d %d %d / %d %d %d %d //\n",
2089 c->chrSrcW,
2090c->chrSrcH,
2091c->chrDstW,
2092c->chrDstH,
2093srcW,
2094srcH,
2095dstW,
2096dstH);*/
1e621b18
MN
2097 c->chrXInc= ((c->chrSrcW<<16) + (c->chrDstW>>1))/c->chrDstW;
2098 c->chrYInc= ((c->chrSrcH<<16) + (c->chrDstH>>1))/c->chrDstH;
2099
28bf81c9
MN
2100 // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
2101 // but only for the FAST_BILINEAR mode otherwise do correct scaling
2102 // n-2 is the last chrominance sample available
2103 // this is not perfect, but noone shuld notice the difference, the more correct variant
2104 // would be like the vertical one, but that would require some special code for the
2105 // first and last pixel
2106 if(flags&SWS_FAST_BILINEAR)
2107 {
1e621b18
MN
2108 if(c->canMMX2BeUsed)
2109 {
2110 c->lumXInc+= 20;
2111 c->chrXInc+= 20;
2112 }
28bf81c9 2113 //we dont use the x86asm scaler if mmx is available
1e621b18
MN
2114 else if(cpuCaps.hasMMX)
2115 {
2116 c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
2117 c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
2118 }
28bf81c9
MN
2119 }
2120
28bf81c9
MN
2121 /* precalculate horizontal scaler filter coefficients */
2122 {
2123 const int filterAlign= cpuCaps.hasMMX ? 4 : 1;
2124
c7f822d9
MN
2125 initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc,
2126 srcW , dstW, filterAlign, 1<<14, flags,
2127 srcFilter->lumH, dstFilter->lumH);
2128 initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc,
2129 (srcW+1)>>1, c->chrDstW, filterAlign, 1<<14, flags,
2130 srcFilter->chrH, dstFilter->chrH);
28bf81c9
MN
2131
2132#ifdef ARCH_X86
2133// cant downscale !!!
2134 if(c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
2135 {
b7dc6f66
MN
2136 c->lumMmx2Filter = (int16_t*)memalign(8, (dstW /8+8)*sizeof(int16_t));
2137 c->chrMmx2Filter = (int16_t*)memalign(8, (c->chrDstW /4+8)*sizeof(int16_t));
2138 c->lumMmx2FilterPos= (int32_t*)memalign(8, (dstW /2/8+8)*sizeof(int32_t));
2139 c->chrMmx2FilterPos= (int32_t*)memalign(8, (c->chrDstW/2/4+8)*sizeof(int32_t));
2140
2141 initMMX2HScaler( dstW, c->lumXInc, c->funnyYCode , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
2142 initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
28bf81c9
MN
2143 }
2144#endif
2145 } // Init Horizontal stuff
2146
2147
2148
2149 /* precalculate vertical scaler filter coefficients */
c7f822d9
MN
2150 initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
2151 srcH , dstH, 1, (1<<12)-4, flags,
2152 srcFilter->lumV, dstFilter->lumV);
2153 initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
2154 (srcH+1)>>1, c->chrDstH, 1, (1<<12)-4, flags,
2155 srcFilter->chrV, dstFilter->chrV);
28bf81c9
MN
2156
2157 // Calculate Buffer Sizes so that they wont run out while handling these damn slices
2158 c->vLumBufSize= c->vLumFilterSize;
2159 c->vChrBufSize= c->vChrFilterSize;
2160 for(i=0; i<dstH; i++)
2161 {
2162 int chrI= i*c->chrDstH / dstH;
2163 int nextSlice= MAX(c->vLumFilterPos[i ] + c->vLumFilterSize - 1,
2164 ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<1));
2165 nextSlice&= ~1; // Slices start at even boundaries
2166 if(c->vLumFilterPos[i ] + c->vLumBufSize < nextSlice)
2167 c->vLumBufSize= nextSlice - c->vLumFilterPos[i ];
2168 if(c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>1))
2169 c->vChrBufSize= (nextSlice>>1) - c->vChrFilterPos[chrI];
2170 }
2171
2172 // allocate pixbufs (we use dynamic allocation because otherwise we would need to
c7f822d9
MN
2173 c->lumPixBuf= (int16_t**)memalign(4, c->vLumBufSize*2*sizeof(int16_t*));
2174 c->chrPixBuf= (int16_t**)memalign(4, c->vChrBufSize*2*sizeof(int16_t*));
6c7506de 2175 //Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000)
28bf81c9
MN
2176 for(i=0; i<c->vLumBufSize; i++)
2177 c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= (uint16_t*)memalign(8, 4000);
2178 for(i=0; i<c->vChrBufSize; i++)
2179 c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= (uint16_t*)memalign(8, 8000);
2180
2181 //try to avoid drawing green stuff between the right end and the stride end
2182 for(i=0; i<c->vLumBufSize; i++) memset(c->lumPixBuf[i], 0, 4000);
2183 for(i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, 8000);
2184
2185 ASSERT(c->chrDstH <= dstH)
28bf81c9
MN
2186
2187 // pack filter data for mmx code
2188 if(cpuCaps.hasMMX)
2189 {
c7f822d9
MN
2190 c->lumMmxFilter= (int16_t*)memalign(8, c->vLumFilterSize* dstH*4*sizeof(int16_t));
2191 c->chrMmxFilter= (int16_t*)memalign(8, c->vChrFilterSize*c->chrDstH*4*sizeof(int16_t));
28bf81c9
MN
2192 for(i=0; i<c->vLumFilterSize*dstH; i++)
2193 c->lumMmxFilter[4*i]=c->lumMmxFilter[4*i+1]=c->lumMmxFilter[4*i+2]=c->lumMmxFilter[4*i+3]=
2194 c->vLumFilter[i];
2195 for(i=0; i<c->vChrFilterSize*c->chrDstH; i++)
2196 c->chrMmxFilter[4*i]=c->chrMmxFilter[4*i+1]=c->chrMmxFilter[4*i+2]=c->chrMmxFilter[4*i+3]=
2197 c->vChrFilter[i];
2198 }
2199
2200 if(flags&SWS_PRINT_INFO)
2201 {
2202#ifdef DITHER1XBPP
5521b193
MN
2203 char *dither= " dithered";
2204#else
2205 char *dither= "";
28bf81c9
MN
2206#endif
2207 if(flags&SWS_FAST_BILINEAR)
0d9f3d85 2208 MSG_INFO("\nSwScaler: FAST_BILINEAR scaler, ");
28bf81c9 2209 else if(flags&SWS_BILINEAR)
0d9f3d85 2210 MSG_INFO("\nSwScaler: BILINEAR scaler, ");
28bf81c9 2211 else if(flags&SWS_BICUBIC)
0d9f3d85 2212 MSG_INFO("\nSwScaler: BICUBIC scaler, ");
1e621b18 2213 else if(flags&SWS_X)
0d9f3d85 2214 MSG_INFO("\nSwScaler: Experimental scaler, ");
ff7ba856 2215 else if(flags&SWS_POINT)
0d9f3d85 2216 MSG_INFO("\nSwScaler: Nearest Neighbor / POINT scaler, ");
d8863d37 2217 else if(flags&SWS_AREA)
0d9f3d85 2218 MSG_INFO("\nSwScaler: Area Averageing scaler, ");
28bf81c9 2219 else
0d9f3d85 2220 MSG_INFO("\nSwScaler: ehh flags invalid?! ");
28bf81c9 2221
0d9f3d85
A
2222 if(dstFormat==IMGFMT_BGR15 || dstFormat==IMGFMT_BGR16)
2223 MSG_INFO("from %s to%s %s ",
2224 vo_format_name(srcFormat), dither, vo_format_name(dstFormat));
2225 else
2226 MSG_INFO("from %s to %s ",
2227 vo_format_name(srcFormat), vo_format_name(dstFormat));
28bf81c9
MN
2228
2229 if(cpuCaps.hasMMX2)
0d9f3d85 2230 MSG_INFO("using MMX2\n");
28bf81c9 2231 else if(cpuCaps.has3DNow)
0d9f3d85 2232 MSG_INFO("using 3DNOW\n");
28bf81c9 2233 else if(cpuCaps.hasMMX)
0d9f3d85 2234 MSG_INFO("using MMX\n");
28bf81c9 2235 else
0d9f3d85 2236 MSG_INFO("using C\n");
28bf81c9
MN
2237 }
2238
2239 if((flags & SWS_PRINT_INFO) && verbose)
2240 {
2241 if(cpuCaps.hasMMX)
2242 {
2243 if(c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR))
0d9f3d85 2244 MSG_V("SwScaler: using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
28bf81c9
MN
2245 else
2246 {
2247 if(c->hLumFilterSize==4)
0d9f3d85 2248 MSG_V("SwScaler: using 4-tap MMX scaler for horizontal luminance scaling\n");
28bf81c9 2249 else if(c->hLumFilterSize==8)
0d9f3d85 2250 MSG_V("SwScaler: using 8-tap MMX scaler for horizontal luminance scaling\n");
28bf81c9 2251 else
0d9f3d85 2252 MSG_V("SwScaler: using n-tap MMX scaler for horizontal luminance scaling\n");
28bf81c9
MN
2253
2254 if(c->hChrFilterSize==4)
0d9f3d85 2255 MSG_V("SwScaler: using 4-tap MMX scaler for horizontal chrominance scaling\n");
28bf81c9 2256 else if(c->hChrFilterSize==8)
0d9f3d85 2257 MSG_V("SwScaler: using 8-tap MMX scaler for horizontal chrominance scaling\n");
28bf81c9 2258 else
0d9f3d85 2259 MSG_V("SwScaler: using n-tap MMX scaler for horizontal chrominance scaling\n");
28bf81c9
MN
2260 }
2261 }
2262 else
2263 {
2264#ifdef ARCH_X86
0d9f3d85 2265 MSG_V("SwScaler: using X86-Asm scaler for horizontal scaling\n");
28bf81c9
MN
2266#else
2267 if(flags & SWS_FAST_BILINEAR)
0d9f3d85 2268 MSG_V("SwScaler: using FAST_BILINEAR C scaler for horizontal scaling\n");
28bf81c9 2269 else
0d9f3d85 2270 MSG_V("SwScaler: using C scaler for horizontal scaling\n");
28bf81c9
MN
2271#endif
2272 }
6c7506de 2273 if(isPlanarYUV(dstFormat))
28bf81c9
MN
2274 {
2275 if(c->vLumFilterSize==1)
0d9f3d85 2276 MSG_V("SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", cpuCaps.hasMMX ? "MMX" : "C");
28bf81c9 2277 else
0d9f3d85 2278 MSG_V("SwScaler: using n-tap %s scaler for vertical scaling (YV12 like)\n", cpuCaps.hasMMX ? "MMX" : "C");
28bf81c9
MN
2279 }
2280 else
2281 {
2282 if(c->vLumFilterSize==1 && c->vChrFilterSize==2)
0d9f3d85 2283 MSG_V("SwScaler: using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
28bf81c9
MN
2284 "SwScaler: 2-tap scaler for vertical chrominance scaling (BGR)\n",cpuCaps.hasMMX ? "MMX" : "C");
2285 else if(c->vLumFilterSize==2 && c->vChrFilterSize==2)
0d9f3d85 2286 MSG_V("SwScaler: using 2-tap linear %s scaler for vertical scaling (BGR)\n", cpuCaps.hasMMX ? "MMX" : "C");
28bf81c9 2287 else
0d9f3d85 2288 MSG_V("SwScaler: using n-tap %s scaler for vertical scaling (BGR)\n", cpuCaps.hasMMX ? "MMX" : "C");
28bf81c9
MN
2289 }
2290
2291 if(dstFormat==IMGFMT_BGR24)
0d9f3d85 2292 MSG_V("SwScaler: using %s YV12->BGR24 Converter\n",
28bf81c9 2293 cpuCaps.hasMMX2 ? "MMX2" : (cpuCaps.hasMMX ? "MMX" : "C"));
fd284805 2294 else if(dstFormat==IMGFMT_BGR32)
0d9f3d85 2295 MSG_V("SwScaler: using %s YV12->BGR32 Converter\n", cpuCaps.hasMMX ? "MMX" : "C");
fd284805 2296 else if(dstFormat==IMGFMT_BGR16)
0d9f3d85 2297 MSG_V("SwScaler: using %s YV12->BGR16 Converter\n", cpuCaps.hasMMX ? "MMX" : "C");
fd284805 2298 else if(dstFormat==IMGFMT_BGR15)
0d9f3d85 2299 MSG_V("SwScaler: using %s YV12->BGR15 Converter\n", cpuCaps.hasMMX ? "MMX" : "C");
28bf81c9 2300
0d9f3d85 2301 MSG_V("SwScaler: %dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
28bf81c9 2302 }
1e621b18
MN
2303 if((flags & SWS_PRINT_INFO) && verbose>1)
2304 {
0d9f3d85 2305 MSG_DBG2("SwScaler:Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
1e621b18 2306 c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
0d9f3d85 2307 MSG_DBG2("SwScaler:Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
1e621b18
MN
2308 c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc);
2309 }
37079906
MN
2310
2311 c->swScale= swScale;
28bf81c9
MN
2312 return c;
2313}
2314
2315/**
2316 * returns a normalized gaussian curve used to filter stuff
2317 * quality=3 is high quality, lowwer is lowwer quality
2318 */
c7f822d9
MN
2319
2320SwsVector *getGaussianVec(double variance, double quality){
28bf81c9
MN
2321 const int length= (int)(variance*quality + 0.5) | 1;
2322 int i;
2323 double *coeff= memalign(sizeof(double), length*sizeof(double));
2324 double middle= (length-1)*0.5;
c7f822d9
MN
2325 SwsVector *vec= malloc(sizeof(SwsVector));
2326
2327 vec->coeff= coeff;
2328 vec->length= length;
28bf81c9
MN
2329
2330 for(i=0; i<length; i++)
2331 {
2332 double dist= i-middle;
2333 coeff[i]= exp( -dist*dist/(2*variance*variance) ) / sqrt(2*variance*PI);
2334 }
2335
c7f822d9
MN
2336 normalizeVec(vec, 1.0);
2337
2338 return vec;
28bf81c9
MN
2339}
2340
5521b193
MN
2341SwsVector *getConstVec(double c, int length){
2342 int i;
2343 double *coeff= memalign(sizeof(double), length*sizeof(double));
2344 SwsVector *vec= malloc(sizeof(SwsVector));
2345
2346 vec->coeff= coeff;
2347 vec->length= length;
2348
2349 for(i=0; i<length; i++)
2350 coeff[i]= c;
2351
2352 return vec;
2353}
2354
2355
c7f822d9
MN
2356SwsVector *getIdentityVec(void){
2357 double *coeff= memalign(sizeof(double), sizeof(double));
2358 SwsVector *vec= malloc(sizeof(SwsVector));
2359 coeff[0]= 1.0;
2360
2361 vec->coeff= coeff;
2362 vec->length= 1;
2363
2364 return vec;
2365}
2366
2367void normalizeVec(SwsVector *a, double height){
28bf81c9
MN
2368 int i;
2369 double sum=0;
2370 double inv;
2371
c7f822d9
MN
2372 for(i=0; i<a->length; i++)
2373 sum+= a->coeff[i];
28bf81c9
MN
2374
2375 inv= height/sum;
2376
c7f822d9
MN
2377 for(i=0; i<a->length; i++)
2378 a->coeff[i]*= height;
28bf81c9
MN
2379}
2380
c7f822d9
MN
2381void scaleVec(SwsVector *a, double scalar){
2382 int i;
2383
2384 for(i=0; i<a->length; i++)
2385 a->coeff[i]*= scalar;
2386}
2387
5cebb24b 2388static SwsVector *getConvVec(SwsVector *a, SwsVector *b){
c7f822d9 2389 int length= a->length + b->length - 1;
28bf81c9
MN
2390 double *coeff= memalign(sizeof(double), length*sizeof(double));
2391 int i, j;
c7f822d9
MN
2392 SwsVector *vec= malloc(sizeof(SwsVector));
2393
2394 vec->coeff= coeff;
2395 vec->length= length;
28bf81c9
MN
2396
2397 for(i=0; i<length; i++) coeff[i]= 0.0;
2398
c7f822d9 2399 for(i=0; i<a->length; i++)
28bf81c9 2400 {
c7f822d9 2401 for(j=0; j<b->length; j++)
28bf81c9 2402 {
c7f822d9 2403 coeff[i+j]+= a->coeff[i]*b->coeff[j];
28bf81c9
MN
2404 }
2405 }
2406
c7f822d9 2407 return vec;
28bf81c9
MN
2408}
2409
5cebb24b 2410static SwsVector *sumVec(SwsVector *a, SwsVector *b){
c7f822d9 2411 int length= MAX(a->length, b->length);
28bf81c9
MN
2412 double *coeff= memalign(sizeof(double), length*sizeof(double));
2413 int i;
c7f822d9
MN
2414 SwsVector *vec= malloc(sizeof(SwsVector));
2415
2416 vec->coeff= coeff;
2417 vec->length= length;
28bf81c9
MN
2418
2419 for(i=0; i<length; i++) coeff[i]= 0.0;
2420
c7f822d9
MN
2421 for(i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
2422 for(i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]+= b->coeff[i];
2423
2424 return vec;
28bf81c9 2425}
c7f822d9 2426
5cebb24b 2427static SwsVector *diffVec(SwsVector *a, SwsVector *b){
c7f822d9
MN
2428 int length= MAX(a->length, b->length);
2429 double *coeff= memalign(sizeof(double), length*sizeof(double));
2430 int i;
2431 SwsVector *vec= malloc(sizeof(SwsVector));
2432
2433 vec->coeff= coeff;
2434 vec->length= length;
2435
2436 for(i=0; i<length; i++) coeff[i]= 0.0;
2437
2438 for(i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
2439 for(i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]-= b->coeff[i];
2440
2441 return vec;
2442}
2443
2444/* shift left / or right if "shift" is negative */
5cebb24b 2445static SwsVector *getShiftedVec(SwsVector *a, int shift){
c7f822d9
MN
2446 int length= a->length + ABS(shift)*2;
2447 double *coeff= memalign(sizeof(double), length*sizeof(double));
ff7ba856 2448 int i;
c7f822d9
MN
2449 SwsVector *vec= malloc(sizeof(SwsVector));
2450
2451 vec->coeff= coeff;
2452 vec->length= length;
2453
2454 for(i=0; i<length; i++) coeff[i]= 0.0;
2455
2456 for(i=0; i<a->length; i++)
2457 {
2458 coeff[i + (length-1)/2 - (a->length-1)/2 - shift]= a->coeff[i];
2459 }
2460
2461 return vec;
2462}
2463
5cebb24b
MN
2464void shiftVec(SwsVector *a, int shift){
2465 SwsVector *shifted= getShiftedVec(a, shift);
2466 free(a->coeff);
2467 a->coeff= shifted->coeff;
2468 a->length= shifted->length;
2469 free(shifted);
2470}
2471
2472void addVec(SwsVector *a, SwsVector *b){
2473 SwsVector *sum= sumVec(a, b);
2474 free(a->coeff);
2475 a->coeff= sum->coeff;
2476 a->length= sum->length;
2477 free(sum);
2478}
2479
2480void subVec(SwsVector *a, SwsVector *b){
2481 SwsVector *diff= diffVec(a, b);
2482 free(a->coeff);
2483 a->coeff= diff->coeff;
2484 a->length= diff->length;
2485 free(diff);
2486}
2487
2488void convVec(SwsVector *a, SwsVector *b){
2489 SwsVector *conv= getConvVec(a, b);
2490 free(a->coeff);
2491 a->coeff= conv->coeff;
2492 a->length= conv->length;
2493 free(conv);
2494}
2495
2496SwsVector *cloneVec(SwsVector *a){
2497 double *coeff= memalign(sizeof(double), a->length*sizeof(double));
2498 int i;
2499 SwsVector *vec= malloc(sizeof(SwsVector));
2500
2501 vec->coeff= coeff;
2502 vec->length= a->length;
2503
2504 for(i=0; i<a->length; i++) coeff[i]= a->coeff[i];
2505
2506 return vec;
2507}
2508
c7f822d9
MN
2509void printVec(SwsVector *a){
2510 int i;
2511 double max=0;
2512 double min=0;
2513 double range;
2514
2515 for(i=0; i<a->length; i++)
2516 if(a->coeff[i]>max) max= a->coeff[i];
2517
2518 for(i=0; i<a->length; i++)
2519 if(a->coeff[i]<min) min= a->coeff[i];
2520
2521 range= max - min;
2522
2523 for(i=0; i<a->length; i++)
2524 {
2525 int x= (int)((a->coeff[i]-min)*60.0/range +0.5);
0d9f3d85
A
2526 MSG_DBG2("%1.3f ", a->coeff[i]);
2527 for(;x>0; x--) MSG_DBG2(" ");
2528 MSG_DBG2("|\n");
c7f822d9
MN
2529 }
2530}
2531
2532void freeVec(SwsVector *a){
2533 if(!a) return;
2534 if(a->coeff) free(a->coeff);
2535 a->coeff=NULL;
2536 a->length=0;
2537 free(a);
2538}
2539
2540void freeSwsContext(SwsContext *c){
2541 int i;
c7f822d9
MN
2542 if(!c) return;
2543
2544 if(c->lumPixBuf)
2545 {
6c7506de 2546 for(i=0; i<c->vLumBufSize; i++)
c7f822d9
MN
2547 {
2548 if(c->lumPixBuf[i]) free(c->lumPixBuf[i]);
2549 c->lumPixBuf[i]=NULL;
2550 }
2551 free(c->lumPixBuf);
2552 c->lumPixBuf=NULL;
2553 }
2554
2555 if(c->chrPixBuf)
2556 {
6c7506de 2557 for(i=0; i<c->vChrBufSize; i++)
c7f822d9
MN
2558 {
2559 if(c->chrPixBuf[i]) free(c->chrPixBuf[i]);
2560 c->chrPixBuf[i]=NULL;
2561 }
2562 free(c->chrPixBuf);
2563 c->chrPixBuf=NULL;
2564 }
2565
2566 if(c->vLumFilter) free(c->vLumFilter);
2567 c->vLumFilter = NULL;
2568 if(c->vChrFilter) free(c->vChrFilter);
2569 c->vChrFilter = NULL;
2570 if(c->hLumFilter) free(c->hLumFilter);
2571 c->hLumFilter = NULL;
2572 if(c->hChrFilter) free(c->hChrFilter);
2573 c->hChrFilter = NULL;
2574
2575 if(c->vLumFilterPos) free(c->vLumFilterPos);
2576 c->vLumFilterPos = NULL;
2577 if(c->vChrFilterPos) free(c->vChrFilterPos);
2578 c->vChrFilterPos = NULL;
2579 if(c->hLumFilterPos) free(c->hLumFilterPos);
2580 c->hLumFilterPos = NULL;
2581 if(c->hChrFilterPos) free(c->hChrFilterPos);
2582 c->hChrFilterPos = NULL;
2583
2584 if(c->lumMmxFilter) free(c->lumMmxFilter);
2585 c->lumMmxFilter = NULL;
2586 if(c->chrMmxFilter) free(c->chrMmxFilter);
2587 c->chrMmxFilter = NULL;
2588
b7dc6f66
MN
2589 if(c->lumMmx2Filter) free(c->lumMmx2Filter);
2590 c->lumMmx2Filter=NULL;
2591 if(c->chrMmx2Filter) free(c->chrMmx2Filter);
2592 c->chrMmx2Filter=NULL;
2593 if(c->lumMmx2FilterPos) free(c->lumMmx2FilterPos);
2594 c->lumMmx2FilterPos=NULL;
2595 if(c->chrMmx2FilterPos) free(c->chrMmx2FilterPos);
2596 c->chrMmx2FilterPos=NULL;
2597
c7f822d9
MN
2598 free(c);
2599}
2600
7f56a527 2601