typo in a comment ...
[libav.git] / postproc / rgb2rgb.c
CommitLineData
fcfbc150 1/*
a3aece93
NK
2 *
3 * rgb2rgb.c, Software RGB to RGB convertor
6611aa83
NK
4 * pluralize by Software PAL8 to RGB convertor
5 * Software YUV to YUV convertor
6 * Software YUV to RGB convertor
a3aece93 7 * Written by Nick Kurshev.
1de97d84 8 * palette & yuv & runtime cpu stuff by Michael (michaelni@gmx.at) (under GPL)
a3aece93 9 */
b234ae81
NK
10#include <inttypes.h>
11#include "../config.h"
12#include "rgb2rgb.h"
1de97d84 13#include "../cpudetect.h"
854288bb 14#include "../mangle.h"
6c1baeb0 15#include "../bswap.h"
005ba718 16#include "../libvo/fastmemcpy.h"
a3aece93 17
1de97d84
MN
18#ifdef ARCH_X86
19#define CAN_COMPILE_X86_ASM
20#endif
21
21316f3c
MN
22#define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
23
1de97d84 24#ifdef CAN_COMPILE_X86_ASM
0d9f3d85
A
25static const uint64_t mmx_null __attribute__((aligned(8))) = 0x0000000000000000ULL;
26static const uint64_t mmx_one __attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL;
99969243
MN
27static const uint64_t mask32b __attribute__((aligned(8))) = 0x000000FF000000FFULL;
28static const uint64_t mask32g __attribute__((aligned(8))) = 0x0000FF000000FF00ULL;
29static const uint64_t mask32r __attribute__((aligned(8))) = 0x00FF000000FF0000ULL;
a3aece93 30static const uint64_t mask32 __attribute__((aligned(8))) = 0x00FFFFFF00FFFFFFULL;
aeae5d53
MN
31static const uint64_t mask3216br __attribute__((aligned(8)))=0x00F800F800F800F8ULL;
32static const uint64_t mask3216g __attribute__((aligned(8)))=0x0000FC000000FC00ULL;
33static const uint64_t mask3215g __attribute__((aligned(8)))=0x0000F8000000F800ULL;
34static const uint64_t mul3216 __attribute__((aligned(8))) = 0x2000000420000004ULL;
35static const uint64_t mul3215 __attribute__((aligned(8))) = 0x2000000820000008ULL;
74d35835
MN
36static const uint64_t mask24b __attribute__((aligned(8))) = 0x00FF0000FF0000FFULL;
37static const uint64_t mask24g __attribute__((aligned(8))) = 0xFF0000FF0000FF00ULL;
38static const uint64_t mask24r __attribute__((aligned(8))) = 0x0000FF0000FF0000ULL;
a3aece93
NK
39static const uint64_t mask24l __attribute__((aligned(8))) = 0x0000000000FFFFFFULL;
40static const uint64_t mask24h __attribute__((aligned(8))) = 0x0000FFFFFF000000ULL;
2b3eef22
NK
41static const uint64_t mask24hh __attribute__((aligned(8))) = 0xffff000000000000ULL;
42static const uint64_t mask24hhh __attribute__((aligned(8))) = 0xffffffff00000000ULL;
43static const uint64_t mask24hhhh __attribute__((aligned(8))) = 0xffffffffffff0000ULL;
a3aece93
NK
44static const uint64_t mask15b __attribute__((aligned(8))) = 0x001F001F001F001FULL; /* 00000000 00011111 xxB */
45static const uint64_t mask15rg __attribute__((aligned(8))) = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000 RGx */
9b2c28e6 46static const uint64_t mask15s __attribute__((aligned(8))) = 0xFFE0FFE0FFE0FFE0ULL;
0d9f3d85
A
47static const uint64_t mask15g __attribute__((aligned(8))) = 0x03E003E003E003E0ULL;
48static const uint64_t mask15r __attribute__((aligned(8))) = 0x7C007C007C007C00ULL;
49#define mask16b mask15b
50static const uint64_t mask16g __attribute__((aligned(8))) = 0x07E007E007E007E0ULL;
51static const uint64_t mask16r __attribute__((aligned(8))) = 0xF800F800F800F800ULL;
53445e83
NK
52static const uint64_t red_16mask __attribute__((aligned(8))) = 0x0000f8000000f800ULL;
53static const uint64_t green_16mask __attribute__((aligned(8)))= 0x000007e0000007e0ULL;
54static const uint64_t blue_16mask __attribute__((aligned(8))) = 0x0000001f0000001fULL;
55static const uint64_t red_15mask __attribute__((aligned(8))) = 0x00007c000000f800ULL;
56static const uint64_t green_15mask __attribute__((aligned(8)))= 0x000003e0000007e0ULL;
57static const uint64_t blue_15mask __attribute__((aligned(8))) = 0x0000001f0000001fULL;
21316f3c
MN
58
59#ifdef FAST_BGR2YV12
60static const uint64_t bgr2YCoeff __attribute__((aligned(8))) = 0x000000210041000DULL;
61static const uint64_t bgr2UCoeff __attribute__((aligned(8))) = 0x0000FFEEFFDC0038ULL;
62static const uint64_t bgr2VCoeff __attribute__((aligned(8))) = 0x00000038FFD2FFF8ULL;
63#else
64static const uint64_t bgr2YCoeff __attribute__((aligned(8))) = 0x000020E540830C8BULL;
65static const uint64_t bgr2UCoeff __attribute__((aligned(8))) = 0x0000ED0FDAC23831ULL;
66static const uint64_t bgr2VCoeff __attribute__((aligned(8))) = 0x00003831D0E6F6EAULL;
67#endif
68static const uint64_t bgr2YOffset __attribute__((aligned(8))) = 0x1010101010101010ULL;
69static const uint64_t bgr2UVOffset __attribute__((aligned(8)))= 0x8080808080808080ULL;
70static const uint64_t w1111 __attribute__((aligned(8))) = 0x0001000100010001ULL;
71
99969243
MN
72#if 0
73static volatile uint64_t __attribute__((aligned(8))) b5Dither;
74static volatile uint64_t __attribute__((aligned(8))) g5Dither;
75static volatile uint64_t __attribute__((aligned(8))) g6Dither;
76static volatile uint64_t __attribute__((aligned(8))) r5Dither;
77
78static uint64_t __attribute__((aligned(8))) dither4[2]={
79 0x0103010301030103LL,
80 0x0200020002000200LL,};
81
82static uint64_t __attribute__((aligned(8))) dither8[2]={
83 0x0602060206020602LL,
84 0x0004000400040004LL,};
85#endif
e697a141 86#endif
79811694 87
1de97d84
MN
88#define RGB2YUV_SHIFT 8
89#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
90#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
91#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
92#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
93#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
94#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
95#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
96#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
97#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
98
99//Note: we have C, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
100//Plain C versions
101#undef HAVE_MMX
102#undef HAVE_MMX2
103#undef HAVE_3DNOW
104#undef ARCH_X86
52075cde 105#undef HAVE_SSE2
1de97d84
MN
106#define RENAME(a) a ## _C
107#include "rgb2rgb_template.c"
108
109#ifdef CAN_COMPILE_X86_ASM
110
111//MMX versions
112#undef RENAME
113#define HAVE_MMX
114#undef HAVE_MMX2
115#undef HAVE_3DNOW
52075cde 116#undef HAVE_SSE2
1de97d84
MN
117#define ARCH_X86
118#define RENAME(a) a ## _MMX
119#include "rgb2rgb_template.c"
120
121//MMX2 versions
122#undef RENAME
123#define HAVE_MMX
124#define HAVE_MMX2
125#undef HAVE_3DNOW
52075cde 126#undef HAVE_SSE2
1de97d84
MN
127#define ARCH_X86
128#define RENAME(a) a ## _MMX2
129#include "rgb2rgb_template.c"
130
131//3DNOW versions
132#undef RENAME
133#define HAVE_MMX
134#undef HAVE_MMX2
135#define HAVE_3DNOW
52075cde 136#undef HAVE_SSE2
1de97d84
MN
137#define ARCH_X86
138#define RENAME(a) a ## _3DNow
139#include "rgb2rgb_template.c"
140
141#endif //CAN_COMPILE_X86_ASM
142
996e1a7c 143void rgb24to32(const uint8_t *src,uint8_t *dst,unsigned src_size)
b234ae81 144{
1de97d84
MN
145#ifdef CAN_COMPILE_X86_ASM
146 // ordered per speed fasterst first
147 if(gCpuCaps.hasMMX2)
148 rgb24to32_MMX2(src, dst, src_size);
149 else if(gCpuCaps.has3DNow)
150 rgb24to32_3DNow(src, dst, src_size);
151 else if(gCpuCaps.hasMMX)
152 rgb24to32_MMX(src, dst, src_size);
153 else
0d9f3d85 154#endif
1de97d84 155 rgb24to32_C(src, dst, src_size);
0d9f3d85
A
156}
157
158void rgb15to24(const uint8_t *src,uint8_t *dst,unsigned src_size)
159{
160#ifdef CAN_COMPILE_X86_ASM
161 // ordered per speed fasterst first
162 if(gCpuCaps.hasMMX2)
163 rgb15to24_MMX2(src, dst, src_size);
164 else if(gCpuCaps.has3DNow)
165 rgb15to24_3DNow(src, dst, src_size);
166 else if(gCpuCaps.hasMMX)
167 rgb15to24_MMX(src, dst, src_size);
168 else
49a0c6ee 169#endif
0d9f3d85
A
170 rgb15to24_C(src, dst, src_size);
171}
172
173void rgb16to24(const uint8_t *src,uint8_t *dst,unsigned src_size)
174{
175#ifdef CAN_COMPILE_X86_ASM
176 // ordered per speed fasterst first
177 if(gCpuCaps.hasMMX2)
178 rgb16to24_MMX2(src, dst, src_size);
179 else if(gCpuCaps.has3DNow)
180 rgb16to24_3DNow(src, dst, src_size);
181 else if(gCpuCaps.hasMMX)
182 rgb16to24_MMX(src, dst, src_size);
183 else
184#endif
185 rgb16to24_C(src, dst, src_size);
186}
187
188void rgb15to32(const uint8_t *src,uint8_t *dst,unsigned src_size)
189{
190#ifdef CAN_COMPILE_X86_ASM
191 // ordered per speed fasterst first
192 if(gCpuCaps.hasMMX2)
193 rgb15to32_MMX2(src, dst, src_size);
194 else if(gCpuCaps.has3DNow)
195 rgb15to32_3DNow(src, dst, src_size);
196 else if(gCpuCaps.hasMMX)
197 rgb15to32_MMX(src, dst, src_size);
198 else
199#endif
200 rgb15to32_C(src, dst, src_size);
201}
202
203void rgb16to32(const uint8_t *src,uint8_t *dst,unsigned src_size)
204{
205#ifdef CAN_COMPILE_X86_ASM
206 // ordered per speed fasterst first
207 if(gCpuCaps.hasMMX2)
208 rgb16to32_MMX2(src, dst, src_size);
209 else if(gCpuCaps.has3DNow)
210 rgb16to32_3DNow(src, dst, src_size);
211 else if(gCpuCaps.hasMMX)
212 rgb16to32_MMX(src, dst, src_size);
213 else
214#endif
215 rgb16to32_C(src, dst, src_size);
b234ae81 216}
59ac5a93 217
996e1a7c 218void rgb32to24(const uint8_t *src,uint8_t *dst,unsigned src_size)
59ac5a93 219{
1de97d84
MN
220#ifdef CAN_COMPILE_X86_ASM
221 // ordered per speed fasterst first
222 if(gCpuCaps.hasMMX2)
223 rgb32to24_MMX2(src, dst, src_size);
224 else if(gCpuCaps.has3DNow)
225 rgb32to24_3DNow(src, dst, src_size);
226 else if(gCpuCaps.hasMMX)
227 rgb32to24_MMX(src, dst, src_size);
228 else
494a6294 229#endif
0d9f3d85 230 rgb32to24_C(src, dst, src_size);
59ac5a93 231}
b238eb2e 232
a3aece93
NK
233/*
234 Original by Strepto/Astral
235 ported to gcc & bugfixed : A'rpi
51da31f1 236 MMX2, 3DNOW optimization by Nick Kurshev
9b2c28e6 237 32bit c version, and and&add trick by Michael Niedermayer
a3aece93 238*/
996e1a7c 239void rgb15to16(const uint8_t *src,uint8_t *dst,unsigned src_size)
b238eb2e 240{
1de97d84
MN
241#ifdef CAN_COMPILE_X86_ASM
242 // ordered per speed fasterst first
243 if(gCpuCaps.hasMMX2)
244 rgb15to16_MMX2(src, dst, src_size);
245 else if(gCpuCaps.has3DNow)
246 rgb15to16_3DNow(src, dst, src_size);
247 else if(gCpuCaps.hasMMX)
248 rgb15to16_MMX(src, dst, src_size);
249 else
b238eb2e 250#endif
0d9f3d85 251 rgb15to16_C(src, dst, src_size);
b238eb2e 252}
fcfbc150 253
ac4d0aea
MN
254void rgb16to15(const uint8_t *src,uint8_t *dst,unsigned src_size)
255{
256#ifdef CAN_COMPILE_X86_ASM
257 // ordered per speed fasterst first
258 if(gCpuCaps.hasMMX2)
259 rgb16to15_MMX2(src, dst, src_size);
260 else if(gCpuCaps.has3DNow)
261 rgb16to15_3DNow(src, dst, src_size);
262 else if(gCpuCaps.hasMMX)
263 rgb16to15_MMX(src, dst, src_size);
264 else
265#endif
266 rgb16to15_C(src, dst, src_size);
267}
fcfbc150
MN
268/**
269 * Pallete is assumed to contain bgr32
270 */
996e1a7c 271void palette8torgb32(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette)
fcfbc150 272{
996e1a7c 273 unsigned i;
9780c7ff
AB
274
275/*
4060205b 276 for(i=0; i<num_pixels; i++)
996e1a7c 277 ((unsigned *)dst)[i] = ((unsigned *)palette)[ src[i] ];
9780c7ff 278*/
fcfbc150 279
6c1baeb0
AB
280 for(i=0; i<num_pixels; i++)
281 {
282 //FIXME slow?
9780c7ff
AB
283 dst[0]= palette[ src[i]*4+2 ];
284 dst[1]= palette[ src[i]*4+1 ];
285 dst[2]= palette[ src[i]*4+0 ];
286// dst[3]= 0; /* do we need this cleansing? */
6c1baeb0
AB
287 dst+= 4;
288 }
289}
9780c7ff 290
6c1baeb0
AB
291void palette8tobgr32(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette)
292{
293 unsigned i;
294 for(i=0; i<num_pixels; i++)
9780c7ff
AB
295 {
296 //FIXME slow?
297 dst[0]= palette[ src[i]*4+0 ];
298 dst[1]= palette[ src[i]*4+1 ];
299 dst[2]= palette[ src[i]*4+2 ];
300// dst[3]= 0; /* do we need this cleansing? */
301 dst+= 4;
302 }
6c1baeb0 303}
6c1baeb0 304
9ce6584e
MN
305/**
306 * Pallete is assumed to contain bgr32
307 */
996e1a7c 308void palette8torgb24(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette)
9ce6584e 309{
996e1a7c 310 unsigned i;
9ce6584e
MN
311/*
312 writes 1 byte o much and might cause alignment issues on some architectures?
4060205b 313 for(i=0; i<num_pixels; i++)
996e1a7c 314 ((unsigned *)(&dst[i*3])) = ((unsigned *)palette)[ src[i] ];
9ce6584e 315*/
4060205b 316 for(i=0; i<num_pixels; i++)
9ce6584e
MN
317 {
318 //FIXME slow?
9780c7ff 319 dst[0]= palette[ src[i]*4+2 ];
9ce6584e 320 dst[1]= palette[ src[i]*4+1 ];
9780c7ff 321 dst[2]= palette[ src[i]*4+0 ];
9ce6584e
MN
322 dst+= 3;
323 }
324}
325
6c1baeb0
AB
326void palette8tobgr24(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette)
327{
328 unsigned i;
329/*
330 writes 1 byte o much and might cause alignment issues on some architectures?
331 for(i=0; i<num_pixels; i++)
332 ((unsigned *)(&dst[i*3])) = ((unsigned *)palette)[ src[i] ];
333*/
334 for(i=0; i<num_pixels; i++)
335 {
336 //FIXME slow?
9780c7ff
AB
337 dst[0]= palette[ src[i]*4+0 ];
338 dst[1]= palette[ src[i]*4+1 ];
339 dst[2]= palette[ src[i]*4+2 ];
6c1baeb0
AB
340 dst+= 3;
341 }
342}
343
d661d18d
AB
344void bgr24torgb24(const uint8_t *src, uint8_t *dst, unsigned src_size)
345{
346#ifdef CAN_COMPILE_X86_ASM
347 // ordered per speed fasterst first
348 if(gCpuCaps.hasMMX2)
349 bgr24torgb24_MMX2(src, dst, src_size);
350 else if(gCpuCaps.has3DNow)
351 bgr24torgb24_3DNow(src, dst, src_size);
352 else if(gCpuCaps.hasMMX)
353 bgr24torgb24_MMX(src, dst, src_size);
354 else
355 bgr24torgb24_C(src, dst, src_size);
356#else
357 bgr24torgb24_C(src, dst, src_size);
358#endif
359}
360
996e1a7c 361void rgb32to16(const uint8_t *src, uint8_t *dst, unsigned src_size)
fcfbc150 362{
1de97d84
MN
363#ifdef CAN_COMPILE_X86_ASM
364 // ordered per speed fasterst first
365 if(gCpuCaps.hasMMX2)
366 rgb32to16_MMX2(src, dst, src_size);
367 else if(gCpuCaps.has3DNow)
368 rgb32to16_3DNow(src, dst, src_size);
369 else if(gCpuCaps.hasMMX)
370 rgb32to16_MMX(src, dst, src_size);
371 else
53445e83 372#endif
0d9f3d85 373 rgb32to16_C(src, dst, src_size);
fcfbc150
MN
374}
375
996e1a7c 376void rgb32to15(const uint8_t *src, uint8_t *dst, unsigned src_size)
fcfbc150 377{
1de97d84
MN
378#ifdef CAN_COMPILE_X86_ASM
379 // ordered per speed fasterst first
380 if(gCpuCaps.hasMMX2)
381 rgb32to15_MMX2(src, dst, src_size);
382 else if(gCpuCaps.has3DNow)
383 rgb32to15_3DNow(src, dst, src_size);
384 else if(gCpuCaps.hasMMX)
385 rgb32to15_MMX(src, dst, src_size);
386 else
53445e83 387#endif
0d9f3d85 388 rgb32to15_C(src, dst, src_size);
fcfbc150
MN
389}
390
996e1a7c
NK
391void rgb24to16(const uint8_t *src, uint8_t *dst, unsigned src_size)
392{
1de97d84
MN
393#ifdef CAN_COMPILE_X86_ASM
394 // ordered per speed fasterst first
395 if(gCpuCaps.hasMMX2)
396 rgb24to16_MMX2(src, dst, src_size);
397 else if(gCpuCaps.has3DNow)
398 rgb24to16_3DNow(src, dst, src_size);
399 else if(gCpuCaps.hasMMX)
400 rgb24to16_MMX(src, dst, src_size);
401 else
3eb2151c 402#endif
0d9f3d85 403 rgb24to16_C(src, dst, src_size);
996e1a7c
NK
404}
405
406void rgb24to15(const uint8_t *src, uint8_t *dst, unsigned src_size)
407{
1de97d84
MN
408#ifdef CAN_COMPILE_X86_ASM
409 // ordered per speed fasterst first
410 if(gCpuCaps.hasMMX2)
411 rgb24to15_MMX2(src, dst, src_size);
412 else if(gCpuCaps.has3DNow)
413 rgb24to15_3DNow(src, dst, src_size);
414 else if(gCpuCaps.hasMMX)
415 rgb24to15_MMX(src, dst, src_size);
416 else
53445e83 417#endif
0d9f3d85 418 rgb24to15_C(src, dst, src_size);
996e1a7c 419}
fcfbc150
MN
420
421/**
422 * Palette is assumed to contain bgr16, see rgb32to16 to convert the palette
423 */
996e1a7c 424void palette8torgb16(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette)
fcfbc150 425{
996e1a7c 426 unsigned i;
4060205b 427 for(i=0; i<num_pixels; i++)
fcfbc150
MN
428 ((uint16_t *)dst)[i] = ((uint16_t *)palette)[ src[i] ];
429}
6c1baeb0
AB
430void palette8tobgr16(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette)
431{
432 unsigned i;
433 for(i=0; i<num_pixels; i++)
434 ((uint16_t *)dst)[i] = bswap_16(((uint16_t *)palette)[ src[i] ]);
435}
fcfbc150
MN
436
437/**
438 * Pallete is assumed to contain bgr15, see rgb32to15 to convert the palette
439 */
996e1a7c 440void palette8torgb15(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette)
fcfbc150 441{
996e1a7c 442 unsigned i;
4060205b 443 for(i=0; i<num_pixels; i++)
fcfbc150 444 ((uint16_t *)dst)[i] = ((uint16_t *)palette)[ src[i] ];
9ce6584e 445}
6c1baeb0
AB
446void palette8tobgr15(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette)
447{
448 unsigned i;
449 for(i=0; i<num_pixels; i++)
450 ((uint16_t *)dst)[i] = bswap_16(((uint16_t *)palette)[ src[i] ]);
451}
99969243
MN
452
453void rgb32tobgr32(const uint8_t *src, uint8_t *dst, unsigned int src_size)
454{
1de97d84
MN
455#ifdef CAN_COMPILE_X86_ASM
456 // ordered per speed fasterst first
457 if(gCpuCaps.hasMMX2)
458 rgb32tobgr32_MMX2(src, dst, src_size);
459 else if(gCpuCaps.has3DNow)
460 rgb32tobgr32_3DNow(src, dst, src_size);
461 else if(gCpuCaps.hasMMX)
462 rgb32tobgr32_MMX(src, dst, src_size);
463 else
99969243 464#endif
0d9f3d85 465 rgb32tobgr32_C(src, dst, src_size);
99969243
MN
466}
467
ac4d0aea
MN
468void rgb32tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size)
469{
470 unsigned i;
471 unsigned num_pixels = src_size >> 2;
472 for(i=0; i<num_pixels; i++)
473 {
474 dst[3*i + 0] = src[4*i + 2];
475 dst[3*i + 1] = src[4*i + 1];
476 dst[3*i + 2] = src[4*i + 0];
477 }
478}
479
480void rgb32tobgr16(const uint8_t *src, uint8_t *dst, unsigned int src_size)
481{
482#ifdef CAN_COMPILE_X86_ASM
483 // ordered per speed fasterst first
484 if(gCpuCaps.hasMMX2)
485 rgb32tobgr16_MMX2(src, dst, src_size);
486 else if(gCpuCaps.has3DNow)
487 rgb32tobgr16_3DNow(src, dst, src_size);
488 else if(gCpuCaps.hasMMX)
489 rgb32tobgr16_MMX(src, dst, src_size);
490 else
491#endif
492 rgb32tobgr16_C(src, dst, src_size);
493}
494
495void rgb32tobgr15(const uint8_t *src, uint8_t *dst, unsigned int src_size)
496{
497#ifdef CAN_COMPILE_X86_ASM
498 // ordered per speed fasterst first
499 if(gCpuCaps.hasMMX2)
500 rgb32tobgr15_MMX2(src, dst, src_size);
501 else if(gCpuCaps.has3DNow)
502 rgb32tobgr15_3DNow(src, dst, src_size);
503 else if(gCpuCaps.hasMMX)
504 rgb32tobgr15_MMX(src, dst, src_size);
505 else
506#endif
507 rgb32tobgr15_C(src, dst, src_size);
508}
509
510void rgb24tobgr32(const uint8_t *src, uint8_t *dst, unsigned int src_size)
511{
512 unsigned i;
a1ce46cf 513 for(i=0; 3*i<src_size; i++)
ac4d0aea
MN
514 {
515 dst[4*i + 0] = src[3*i + 2];
516 dst[4*i + 1] = src[3*i + 1];
517 dst[4*i + 2] = src[3*i + 0];
518 dst[4*i + 3] = 0;
519 }
520}
521
74d35835
MN
522void rgb24tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size)
523{
524#ifdef CAN_COMPILE_X86_ASM
525 // ordered per speed fasterst first
526 if(gCpuCaps.hasMMX2)
527 rgb24tobgr24_MMX2(src, dst, src_size);
528 else if(gCpuCaps.has3DNow)
529 rgb24tobgr24_3DNow(src, dst, src_size);
530 else if(gCpuCaps.hasMMX)
531 rgb24tobgr24_MMX(src, dst, src_size);
532 else
74d35835 533#endif
0d9f3d85 534 rgb24tobgr24_C(src, dst, src_size);
74d35835
MN
535}
536
ac4d0aea
MN
537void rgb24tobgr16(const uint8_t *src, uint8_t *dst, unsigned int src_size)
538{
539#ifdef CAN_COMPILE_X86_ASM
540 // ordered per speed fasterst first
541 if(gCpuCaps.hasMMX2)
542 rgb24tobgr16_MMX2(src, dst, src_size);
543 else if(gCpuCaps.has3DNow)
544 rgb24tobgr16_3DNow(src, dst, src_size);
545 else if(gCpuCaps.hasMMX)
546 rgb24tobgr16_MMX(src, dst, src_size);
547 else
548#endif
549 rgb24tobgr16_C(src, dst, src_size);
550}
551
552void rgb24tobgr15(const uint8_t *src, uint8_t *dst, unsigned int src_size)
553{
554#ifdef CAN_COMPILE_X86_ASM
555 // ordered per speed fasterst first
556 if(gCpuCaps.hasMMX2)
557 rgb24tobgr15_MMX2(src, dst, src_size);
558 else if(gCpuCaps.has3DNow)
559 rgb24tobgr15_3DNow(src, dst, src_size);
560 else if(gCpuCaps.hasMMX)
561 rgb24tobgr15_MMX(src, dst, src_size);
562 else
563#endif
564 rgb24tobgr15_C(src, dst, src_size);
565}
566
567void rgb16tobgr32(const uint8_t *src, uint8_t *dst, unsigned int src_size)
568{
569 const uint16_t *end;
570 uint8_t *d = (uint8_t *)dst;
571 const uint16_t *s = (uint16_t *)src;
572 end = s + src_size/2;
573 while(s < end)
574 {
575 register uint16_t bgr;
576 bgr = *s++;
577 *d++ = (bgr&0xF800)>>8;
578 *d++ = (bgr&0x7E0)>>3;
579 *d++ = (bgr&0x1F)<<3;
580 *d++ = 0;
581 }
582}
583
584void rgb16tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size)
585{
586 const uint16_t *end;
587 uint8_t *d = (uint8_t *)dst;
588 const uint16_t *s = (const uint16_t *)src;
589 end = s + src_size/2;
590 while(s < end)
591 {
592 register uint16_t bgr;
593 bgr = *s++;
594 *d++ = (bgr&0xF800)>>8;
595 *d++ = (bgr&0x7E0)>>3;
596 *d++ = (bgr&0x1F)<<3;
597 }
598}
599
600void rgb16tobgr16(const uint8_t *src, uint8_t *dst, unsigned int src_size)
601{
602 unsigned i;
603 unsigned num_pixels = src_size >> 1;
604
605 for(i=0; i<num_pixels; i++)
606 {
607 unsigned b,g,r;
608 register uint16_t rgb;
609 rgb = src[2*i];
610 r = rgb&0x1F;
611 g = (rgb&0x7E0)>>5;
612 b = (rgb&0xF800)>>11;
613 dst[2*i] = (b&0x1F) | ((g&0x3F)<<5) | ((r&0x1F)<<11);
614 }
615}
616
617void rgb16tobgr15(const uint8_t *src, uint8_t *dst, unsigned int src_size)
618{
619 unsigned i;
620 unsigned num_pixels = src_size >> 1;
621
622 for(i=0; i<num_pixels; i++)
623 {
624 unsigned b,g,r;
625 register uint16_t rgb;
626 rgb = src[2*i];
627 r = rgb&0x1F;
628 g = (rgb&0x7E0)>>5;
629 b = (rgb&0xF800)>>11;
630 dst[2*i] = (b&0x1F) | ((g&0x1F)<<5) | ((r&0x1F)<<10);
631 }
632}
633
634void rgb15tobgr32(const uint8_t *src, uint8_t *dst, unsigned int src_size)
635{
636 const uint16_t *end;
637 uint8_t *d = (uint8_t *)dst;
638 const uint16_t *s = (const uint16_t *)src;
639 end = s + src_size/2;
640 while(s < end)
641 {
642 register uint16_t bgr;
643 bgr = *s++;
644 *d++ = (bgr&0x7C00)>>7;
645 *d++ = (bgr&0x3E0)>>2;
646 *d++ = (bgr&0x1F)<<3;
647 *d++ = 0;
648 }
649}
650
651void rgb15tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size)
652{
653 const uint16_t *end;
654 uint8_t *d = (uint8_t *)dst;
655 const uint16_t *s = (uint16_t *)src;
656 end = s + src_size/2;
657 while(s < end)
658 {
659 register uint16_t bgr;
660 bgr = *s++;
661 *d++ = (bgr&0x7C00)>>7;
662 *d++ = (bgr&0x3E0)>>2;
663 *d++ = (bgr&0x1F)<<3;
664 }
665}
666
667void rgb15tobgr16(const uint8_t *src, uint8_t *dst, unsigned int src_size)
668{
669 unsigned i;
670 unsigned num_pixels = src_size >> 1;
671
672 for(i=0; i<num_pixels; i++)
673 {
674 unsigned b,g,r;
675 register uint16_t rgb;
676 rgb = src[2*i];
677 r = rgb&0x1F;
678 g = (rgb&0x3E0)>>5;
679 b = (rgb&0x7C00)>>10;
680 dst[2*i] = (b&0x1F) | ((g&0x3F)<<5) | ((r&0x1F)<<11);
681 }
682}
683
684void rgb15tobgr15(const uint8_t *src, uint8_t *dst, unsigned int src_size)
685{
686 unsigned i;
687 unsigned num_pixels = src_size >> 1;
688
689 for(i=0; i<num_pixels; i++)
690 {
691 unsigned b,g,r;
692 register uint16_t rgb;
693 rgb = src[2*i];
694 r = rgb&0x1F;
695 g = (rgb&0x3E0)>>5;
696 b = (rgb&0x7C00)>>10;
697 dst[2*i] = (b&0x1F) | ((g&0x1F)<<5) | ((r&0x1F)<<10);
698 }
699}
700
701void rgb8tobgr8(const uint8_t *src, uint8_t *dst, unsigned int src_size)
702{
703 unsigned i;
704 unsigned num_pixels = src_size;
705 for(i=0; i<num_pixels; i++)
706 {
707 unsigned b,g,r;
708 register uint8_t rgb;
709 rgb = src[i];
710 r = (rgb&0x07);
711 g = (rgb&0x38)>>3;
712 b = (rgb&0xC0)>>6;
713 dst[i] = ((b<<1)&0x07) | ((g&0x07)<<3) | ((r&0x03)<<6);
714 }
715}
716
4060205b
MN
717/**
718 *
dabcdbc4
MN
719 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
720 * problem for anyone then tell me, and ill fix it)
4060205b 721 */
42b5fcb8 722void yv12toyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
cfc15dc6 723 unsigned int width, unsigned int height,
f0b62bbd 724 int lumStride, int chromStride, int dstStride)
d9d58d17 725{
1de97d84
MN
726#ifdef CAN_COMPILE_X86_ASM
727 // ordered per speed fasterst first
728 if(gCpuCaps.hasMMX2)
729 yv12toyuy2_MMX2(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
730 else if(gCpuCaps.has3DNow)
731 yv12toyuy2_3DNow(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
732 else if(gCpuCaps.hasMMX)
733 yv12toyuy2_MMX(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
734 else
4060205b 735#endif
0d9f3d85 736 yv12toyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
d9d58d17
MN
737}
738
dabcdbc4
MN
739/**
740 *
b1ec5875
MN
741 * width should be a multiple of 16
742 */
743void yuv422ptoyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
744 unsigned int width, unsigned int height,
f0b62bbd 745 int lumStride, int chromStride, int dstStride)
b1ec5875
MN
746{
747#ifdef CAN_COMPILE_X86_ASM
748 // ordered per speed fasterst first
749 if(gCpuCaps.hasMMX2)
750 yuv422ptoyuy2_MMX2(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
751 else if(gCpuCaps.has3DNow)
752 yuv422ptoyuy2_3DNow(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
753 else if(gCpuCaps.hasMMX)
754 yuv422ptoyuy2_MMX(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
755 else
b1ec5875 756#endif
0d9f3d85 757 yuv422ptoyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
b1ec5875
MN
758}
759
760/**
761 *
dabcdbc4
MN
762 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
763 * problem for anyone then tell me, and ill fix it)
764 */
765void yuy2toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
cfc15dc6 766 unsigned int width, unsigned int height,
f0b62bbd 767 int lumStride, int chromStride, int srcStride)
d9d58d17 768{
1de97d84
MN
769#ifdef CAN_COMPILE_X86_ASM
770 // ordered per speed fasterst first
771 if(gCpuCaps.hasMMX2)
772 yuy2toyv12_MMX2(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
773 else if(gCpuCaps.has3DNow)
774 yuy2toyv12_3DNow(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
775 else if(gCpuCaps.hasMMX)
776 yuy2toyv12_MMX(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
777 else
bd09433f 778#endif
0d9f3d85 779 yuy2toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
42b5fcb8 780}
81c0590e
A
781
782/**
783 *
784 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
785 * problem for anyone then tell me, and ill fix it)
1de97d84 786 * chrominance data is only taken from every secound line others are ignored FIXME write HQ version
81c0590e
A
787 */
788void uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
789 unsigned int width, unsigned int height,
f0b62bbd 790 int lumStride, int chromStride, int srcStride)
81c0590e 791{
1de97d84
MN
792#ifdef CAN_COMPILE_X86_ASM
793 // ordered per speed fasterst first
794 if(gCpuCaps.hasMMX2)
795 uyvytoyv12_MMX2(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
796 else if(gCpuCaps.has3DNow)
797 uyvytoyv12_3DNow(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
798 else if(gCpuCaps.hasMMX)
799 uyvytoyv12_MMX(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
800 else
801 uyvytoyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
ed8c0670 802#else
1de97d84 803 uyvytoyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
ed8c0670 804#endif
81c0590e
A
805}
806
d661d18d
AB
807void yvu9toyv12(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc,
808 uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
809 unsigned int width, unsigned int height,
f0b62bbd 810 int lumStride, int chromStride)
d661d18d
AB
811{
812#ifdef CAN_COMPILE_X86_ASM
813 // ordered per speed fasterst first
814 if(gCpuCaps.hasMMX2)
815 yvu9toyv12_MMX2(ysrc, usrc, vsrc, ydst, udst, vdst, width, height, lumStride, chromStride);
816 else if(gCpuCaps.has3DNow)
817 yvu9toyv12_3DNow(ysrc, usrc, vsrc, ydst, udst, vdst, width, height, lumStride, chromStride);
818 else if(gCpuCaps.hasMMX)
819 yvu9toyv12_MMX(ysrc, usrc, vsrc, ydst, udst, vdst, width, height, lumStride, chromStride);
820 else
821 yvu9toyv12_C(ysrc, usrc, vsrc, ydst, udst, vdst, width, height, lumStride, chromStride);
822#else
823 yvu9toyv12_C(ysrc, usrc, vsrc, ydst, udst, vdst, width, height, lumStride, chromStride);
824#endif
825}
826
b241cbf2
MN
827void planar2x(const uint8_t *src, uint8_t *dst, int width, int height, int srcStride, int dstStride)
828{
829#ifdef CAN_COMPILE_X86_ASM
830 // ordered per speed fasterst first
831 if(gCpuCaps.hasMMX2)
832 planar2x_MMX2(src, dst, width, height, srcStride, dstStride);
833 else if(gCpuCaps.has3DNow)
834 planar2x_3DNow(src, dst, width, height, srcStride, dstStride);
835 else
836#endif
837 planar2x_C(src, dst, width, height, srcStride, dstStride);
838}
839
1de97d84
MN
840/**
841 *
842 * height should be a multiple of 2 and width should be a multiple of 2 (if this is a
843 * problem for anyone then tell me, and ill fix it)
844 * chrominance data is only taken from every secound line others are ignored FIXME write HQ version
845 */
846void rgb24toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
847 unsigned int width, unsigned int height,
f0b62bbd 848 int lumStride, int chromStride, int srcStride)
1de97d84
MN
849{
850#ifdef CAN_COMPILE_X86_ASM
851 // ordered per speed fasterst first
852 if(gCpuCaps.hasMMX2)
853 rgb24toyv12_MMX2(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
854 else if(gCpuCaps.has3DNow)
855 rgb24toyv12_3DNow(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
856 else if(gCpuCaps.hasMMX)
857 rgb24toyv12_MMX(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
858 else
1de97d84 859#endif
0d9f3d85 860 rgb24toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
1de97d84 861}
5d55fdb4
MN
862
863void interleaveBytes(uint8_t *src1, uint8_t *src2, uint8_t *dst,
f0b62bbd
MN
864 unsigned width, unsigned height, int src1Stride,
865 int src2Stride, int dstStride)
5d55fdb4
MN
866{
867#ifdef CAN_COMPILE_X86_ASM
868 // ordered per speed fasterst first
869 if(gCpuCaps.hasMMX2)
870 interleaveBytes_MMX2(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride);
871 else if(gCpuCaps.has3DNow)
872 interleaveBytes_3DNow(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride);
873 else if(gCpuCaps.hasMMX)
874 interleaveBytes_MMX(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride);
875 else
5d55fdb4 876#endif
0d9f3d85 877 interleaveBytes_C(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride);
5d55fdb4 878}
ac4d0aea
MN
879
880void vu9_to_vu12(const uint8_t *src1, const uint8_t *src2,
881 uint8_t *dst1, uint8_t *dst2,
882 unsigned width, unsigned height,
f0b62bbd
MN
883 int srcStride1, int srcStride2,
884 int dstStride1, int dstStride2)
ac4d0aea
MN
885{
886#ifdef CAN_COMPILE_X86_ASM
887 if(gCpuCaps.hasMMX2)
888 vu9_to_vu12_MMX2(src1, src2, dst1, dst2, width, height, srcStride1, srcStride2, dstStride1, dstStride2);
889 else if(gCpuCaps.has3DNow)
890 vu9_to_vu12_3DNow(src1, src2, dst1, dst2, width, height, srcStride1, srcStride2, dstStride1, dstStride2);
891 else if(gCpuCaps.hasMMX)
892 vu9_to_vu12_MMX(src1, src2, dst1, dst2, width, height, srcStride1, srcStride2, dstStride1, dstStride2);
893 else
894#endif
895 vu9_to_vu12_C(src1, src2, dst1, dst2, width, height, srcStride1, srcStride2, dstStride1, dstStride2);
896}
897
898void yvu9_to_yuy2(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
899 uint8_t *dst,
900 unsigned width, unsigned height,
f0b62bbd
MN
901 int srcStride1, int srcStride2,
902 int srcStride3, int dstStride)
ac4d0aea
MN
903{
904#ifdef CAN_COMPILE_X86_ASM
905 if(gCpuCaps.hasMMX2)
906 yvu9_to_yuy2_MMX2(src1, src2, src3, dst, width, height, srcStride1, srcStride2, srcStride3, dstStride);
907 else if(gCpuCaps.has3DNow)
908 yvu9_to_yuy2_3DNow(src1, src2, src3, dst, width, height, srcStride1, srcStride2, srcStride3, dstStride);
909 else if(gCpuCaps.hasMMX)
910 yvu9_to_yuy2_MMX(src1, src2, src3, dst, width, height, srcStride1, srcStride2, srcStride3, dstStride);
911 else
912#endif
913 yvu9_to_yuy2_C(src1, src2, src3, dst, width, height, srcStride1, srcStride2, srcStride3, dstStride);
914}