swscale: move away x86 specific code from rgb2rgb
[libav.git] / libswscale / rgb2rgb_template.c
1 /*
2 * software RGB to RGB converter
3 * pluralize by software PAL8 to RGB converter
4 * software YUV to YUV converter
5 * software YUV to RGB converter
6 * Written by Nick Kurshev.
7 * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
8 * lot of big-endian byte order fixes by Alex Beregszaszi
9 *
10 * This file is part of Libav.
11 *
12 * Libav is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
16 *
17 * Libav is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
21 *
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with Libav; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 */
26
27 #include <stddef.h>
28
29 static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst, long src_size)
30 {
31 uint8_t *dest = dst;
32 const uint8_t *s = src;
33 const uint8_t *end;
34 end = s + src_size;
35
36 while (s < end) {
37 #if HAVE_BIGENDIAN
38 /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */
39 *dest++ = 255;
40 *dest++ = s[2];
41 *dest++ = s[1];
42 *dest++ = s[0];
43 s+=3;
44 #else
45 *dest++ = *s++;
46 *dest++ = *s++;
47 *dest++ = *s++;
48 *dest++ = 255;
49 #endif
50 }
51 }
52
53 static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
54 {
55 uint8_t *dest = dst;
56 const uint8_t *s = src;
57 const uint8_t *end;
58
59 end = s + src_size;
60
61 while (s < end) {
62 #if HAVE_BIGENDIAN
63 /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */
64 s++;
65 dest[2] = *s++;
66 dest[1] = *s++;
67 dest[0] = *s++;
68 dest += 3;
69 #else
70 *dest++ = *s++;
71 *dest++ = *s++;
72 *dest++ = *s++;
73 s++;
74 #endif
75 }
76 }
77
78 /*
79 original by Strepto/Astral
80 ported to gcc & bugfixed: A'rpi
81 MMX2, 3DNOW optimization by Nick Kurshev
82 32-bit C version, and and&add trick by Michael Niedermayer
83 */
84 static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, long src_size)
85 {
86 register const uint8_t* s=src;
87 register uint8_t* d=dst;
88 register const uint8_t *end;
89 const uint8_t *mm_end;
90 end = s + src_size;
91 mm_end = end - 3;
92 while (s < mm_end) {
93 register unsigned x= *((const uint32_t *)s);
94 *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
95 d+=4;
96 s+=4;
97 }
98 if (s < end) {
99 register unsigned short x= *((const uint16_t *)s);
100 *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
101 }
102 }
103
104 static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, long src_size)
105 {
106 register const uint8_t* s=src;
107 register uint8_t* d=dst;
108 register const uint8_t *end;
109 const uint8_t *mm_end;
110 end = s + src_size;
111
112 mm_end = end - 3;
113 while (s < mm_end) {
114 register uint32_t x= *((const uint32_t*)s);
115 *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
116 s+=4;
117 d+=4;
118 }
119 if (s < end) {
120 register uint16_t x= *((const uint16_t*)s);
121 *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
122 }
123 }
124
125 static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, long src_size)
126 {
127 const uint8_t *s = src;
128 const uint8_t *end;
129 uint16_t *d = (uint16_t *)dst;
130 end = s + src_size;
131
132 while (s < end) {
133 register int rgb = *(const uint32_t*)s; s += 4;
134 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
135 }
136 }
137
138 static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst, long src_size)
139 {
140 const uint8_t *s = src;
141 const uint8_t *end;
142 uint16_t *d = (uint16_t *)dst;
143 end = s + src_size;
144 while (s < end) {
145 register int rgb = *(const uint32_t*)s; s += 4;
146 *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
147 }
148 }
149
150 static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, long src_size)
151 {
152 const uint8_t *s = src;
153 const uint8_t *end;
154 uint16_t *d = (uint16_t *)dst;
155 end = s + src_size;
156 while (s < end) {
157 register int rgb = *(const uint32_t*)s; s += 4;
158 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
159 }
160 }
161
162 static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst, long src_size)
163 {
164 const uint8_t *s = src;
165 const uint8_t *end;
166 uint16_t *d = (uint16_t *)dst;
167 end = s + src_size;
168 while (s < end) {
169 register int rgb = *(const uint32_t*)s; s += 4;
170 *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
171 }
172 }
173
174 static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst, long src_size)
175 {
176 const uint8_t *s = src;
177 const uint8_t *end;
178 uint16_t *d = (uint16_t *)dst;
179 end = s + src_size;
180 while (s < end) {
181 const int b = *s++;
182 const int g = *s++;
183 const int r = *s++;
184 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
185 }
186 }
187
188 static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, long src_size)
189 {
190 const uint8_t *s = src;
191 const uint8_t *end;
192 uint16_t *d = (uint16_t *)dst;
193 end = s + src_size;
194 while (s < end) {
195 const int r = *s++;
196 const int g = *s++;
197 const int b = *s++;
198 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
199 }
200 }
201
202 static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst, long src_size)
203 {
204 const uint8_t *s = src;
205 const uint8_t *end;
206 uint16_t *d = (uint16_t *)dst;
207 end = s + src_size;
208 while (s < end) {
209 const int b = *s++;
210 const int g = *s++;
211 const int r = *s++;
212 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
213 }
214 }
215
216 static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, long src_size)
217 {
218 const uint8_t *s = src;
219 const uint8_t *end;
220 uint16_t *d = (uint16_t *)dst;
221 end = s + src_size;
222 while (s < end) {
223 const int r = *s++;
224 const int g = *s++;
225 const int b = *s++;
226 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
227 }
228 }
229
230 /*
231 I use less accurate approximation here by simply left-shifting the input
232 value and filling the low order bits with zeroes. This method improves PNG
233 compression but this scheme cannot reproduce white exactly, since it does
234 not generate an all-ones maximum value; the net effect is to darken the
235 image slightly.
236
237 The better method should be "left bit replication":
238
239 4 3 2 1 0
240 ---------
241 1 1 0 1 1
242
243 7 6 5 4 3 2 1 0
244 ----------------
245 1 1 0 1 1 1 1 0
246 |=======| |===|
247 | leftmost bits repeated to fill open bits
248 |
249 original bits
250 */
251 static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
252 {
253 const uint16_t *end;
254 uint8_t *d = dst;
255 const uint16_t *s = (const uint16_t*)src;
256 end = s + src_size/2;
257 while (s < end) {
258 register uint16_t bgr;
259 bgr = *s++;
260 *d++ = (bgr&0x1F)<<3;
261 *d++ = (bgr&0x3E0)>>2;
262 *d++ = (bgr&0x7C00)>>7;
263 }
264 }
265
266 static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
267 {
268 const uint16_t *end;
269 uint8_t *d = (uint8_t *)dst;
270 const uint16_t *s = (const uint16_t *)src;
271 end = s + src_size/2;
272 while (s < end) {
273 register uint16_t bgr;
274 bgr = *s++;
275 *d++ = (bgr&0x1F)<<3;
276 *d++ = (bgr&0x7E0)>>3;
277 *d++ = (bgr&0xF800)>>8;
278 }
279 }
280
281 /*
282 * mm0 = 00 B3 00 B2 00 B1 00 B0
283 * mm1 = 00 G3 00 G2 00 G1 00 G0
284 * mm2 = 00 R3 00 R2 00 R1 00 R0
285 * mm6 = FF FF FF FF FF FF FF FF
286 * mm7 = 00 00 00 00 00 00 00 00
287 */
288 #define PACK_RGB32 \
289 "packuswb %%mm7, %%mm0 \n\t" /* 00 00 00 00 B3 B2 B1 B0 */ \
290 "packuswb %%mm7, %%mm1 \n\t" /* 00 00 00 00 G3 G2 G1 G0 */ \
291 "packuswb %%mm7, %%mm2 \n\t" /* 00 00 00 00 R3 R2 R1 R0 */ \
292 "punpcklbw %%mm1, %%mm0 \n\t" /* G3 B3 G2 B2 G1 B1 G0 B0 */ \
293 "punpcklbw %%mm6, %%mm2 \n\t" /* FF R3 FF R2 FF R1 FF R0 */ \
294 "movq %%mm0, %%mm3 \n\t" \
295 "punpcklwd %%mm2, %%mm0 \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \
296 "punpckhwd %%mm2, %%mm3 \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \
297 MOVNTQ" %%mm0, %0 \n\t" \
298 MOVNTQ" %%mm3, 8%0 \n\t" \
299
300 static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, long src_size)
301 {
302 const uint16_t *end;
303 uint8_t *d = dst;
304 const uint16_t *s = (const uint16_t *)src;
305 end = s + src_size/2;
306 while (s < end) {
307 register uint16_t bgr;
308 bgr = *s++;
309 #if HAVE_BIGENDIAN
310 *d++ = 255;
311 *d++ = (bgr&0x7C00)>>7;
312 *d++ = (bgr&0x3E0)>>2;
313 *d++ = (bgr&0x1F)<<3;
314 #else
315 *d++ = (bgr&0x1F)<<3;
316 *d++ = (bgr&0x3E0)>>2;
317 *d++ = (bgr&0x7C00)>>7;
318 *d++ = 255;
319 #endif
320 }
321 }
322
323 static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, long src_size)
324 {
325 const uint16_t *end;
326 uint8_t *d = dst;
327 const uint16_t *s = (const uint16_t*)src;
328 end = s + src_size/2;
329 while (s < end) {
330 register uint16_t bgr;
331 bgr = *s++;
332 #if HAVE_BIGENDIAN
333 *d++ = 255;
334 *d++ = (bgr&0xF800)>>8;
335 *d++ = (bgr&0x7E0)>>3;
336 *d++ = (bgr&0x1F)<<3;
337 #else
338 *d++ = (bgr&0x1F)<<3;
339 *d++ = (bgr&0x7E0)>>3;
340 *d++ = (bgr&0xF800)>>8;
341 *d++ = 255;
342 #endif
343 }
344 }
345
346 static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst, long src_size)
347 {
348 int idx = 15 - src_size;
349 const uint8_t *s = src-idx;
350 uint8_t *d = dst-idx;
351 for (; idx<15; idx+=4) {
352 register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
353 v &= 0xff00ff;
354 *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16);
355 }
356 }
357
358 static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
359 {
360 unsigned i;
361 for (i=0; i<src_size; i+=3) {
362 register uint8_t x;
363 x = src[i + 2];
364 dst[i + 1] = src[i + 1];
365 dst[i + 2] = src[i + 0];
366 dst[i + 0] = x;
367 }
368 }
369
370 static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
371 const uint8_t *vsrc, uint8_t *dst,
372 long width, long height,
373 long lumStride, long chromStride,
374 long dstStride, long vertLumPerChroma)
375 {
376 long y;
377 const int chromWidth = width >> 1;
378 for (y=0; y<height; y++) {
379 #if HAVE_FAST_64BIT
380 int i;
381 uint64_t *ldst = (uint64_t *) dst;
382 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
383 for (i = 0; i < chromWidth; i += 2) {
384 uint64_t k, l;
385 k = yc[0] + (uc[0] << 8) +
386 (yc[1] << 16) + (vc[0] << 24);
387 l = yc[2] + (uc[1] << 8) +
388 (yc[3] << 16) + (vc[1] << 24);
389 *ldst++ = k + (l << 32);
390 yc += 4;
391 uc += 2;
392 vc += 2;
393 }
394
395 #else
396 int i, *idst = (int32_t *) dst;
397 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
398 for (i = 0; i < chromWidth; i++) {
399 #if HAVE_BIGENDIAN
400 *idst++ = (yc[0] << 24)+ (uc[0] << 16) +
401 (yc[1] << 8) + (vc[0] << 0);
402 #else
403 *idst++ = yc[0] + (uc[0] << 8) +
404 (yc[1] << 16) + (vc[0] << 24);
405 #endif
406 yc += 2;
407 uc++;
408 vc++;
409 }
410 #endif
411 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
412 usrc += chromStride;
413 vsrc += chromStride;
414 }
415 ysrc += lumStride;
416 dst += dstStride;
417 }
418 }
419
420 /**
421 * Height should be a multiple of 2 and width should be a multiple of 16.
422 * (If this is a problem for anyone then tell me, and I will fix it.)
423 */
424 static inline void yv12toyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
425 const uint8_t *vsrc, uint8_t *dst,
426 long width, long height,
427 long lumStride, long chromStride,
428 long dstStride)
429 {
430 //FIXME interpolate chroma
431 yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
432 chromStride, dstStride, 2);
433 }
434
435 static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
436 const uint8_t *vsrc, uint8_t *dst,
437 long width, long height,
438 long lumStride, long chromStride,
439 long dstStride, long vertLumPerChroma)
440 {
441 long y;
442 const int chromWidth = width >> 1;
443 for (y=0; y<height; y++) {
444 #if HAVE_FAST_64BIT
445 int i;
446 uint64_t *ldst = (uint64_t *) dst;
447 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
448 for (i = 0; i < chromWidth; i += 2) {
449 uint64_t k, l;
450 k = uc[0] + (yc[0] << 8) +
451 (vc[0] << 16) + (yc[1] << 24);
452 l = uc[1] + (yc[2] << 8) +
453 (vc[1] << 16) + (yc[3] << 24);
454 *ldst++ = k + (l << 32);
455 yc += 4;
456 uc += 2;
457 vc += 2;
458 }
459
460 #else
461 int i, *idst = (int32_t *) dst;
462 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
463 for (i = 0; i < chromWidth; i++) {
464 #if HAVE_BIGENDIAN
465 *idst++ = (uc[0] << 24)+ (yc[0] << 16) +
466 (vc[0] << 8) + (yc[1] << 0);
467 #else
468 *idst++ = uc[0] + (yc[0] << 8) +
469 (vc[0] << 16) + (yc[1] << 24);
470 #endif
471 yc += 2;
472 uc++;
473 vc++;
474 }
475 #endif
476 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
477 usrc += chromStride;
478 vsrc += chromStride;
479 }
480 ysrc += lumStride;
481 dst += dstStride;
482 }
483 }
484
485 /**
486 * Height should be a multiple of 2 and width should be a multiple of 16
487 * (If this is a problem for anyone then tell me, and I will fix it.)
488 */
489 static inline void yv12touyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
490 const uint8_t *vsrc, uint8_t *dst,
491 long width, long height,
492 long lumStride, long chromStride,
493 long dstStride)
494 {
495 //FIXME interpolate chroma
496 yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
497 chromStride, dstStride, 2);
498 }
499
500 /**
501 * Width should be a multiple of 16.
502 */
503 static inline void yuv422ptouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
504 const uint8_t *vsrc, uint8_t *dst,
505 long width, long height,
506 long lumStride, long chromStride,
507 long dstStride)
508 {
509 yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
510 chromStride, dstStride, 1);
511 }
512
513 /**
514 * Width should be a multiple of 16.
515 */
516 static inline void yuv422ptoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
517 const uint8_t *vsrc, uint8_t *dst,
518 long width, long height,
519 long lumStride, long chromStride,
520 long dstStride)
521 {
522 yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
523 chromStride, dstStride, 1);
524 }
525
526 /**
527 * Height should be a multiple of 2 and width should be a multiple of 16.
528 * (If this is a problem for anyone then tell me, and I will fix it.)
529 */
530 static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst,
531 uint8_t *udst, uint8_t *vdst,
532 long width, long height,
533 long lumStride, long chromStride,
534 long srcStride)
535 {
536 long y;
537 const int chromWidth = width >> 1;
538 for (y=0; y<height; y+=2) {
539 long i;
540 for (i=0; i<chromWidth; i++) {
541 ydst[2*i+0] = src[4*i+0];
542 udst[i] = src[4*i+1];
543 ydst[2*i+1] = src[4*i+2];
544 vdst[i] = src[4*i+3];
545 }
546 ydst += lumStride;
547 src += srcStride;
548
549 for (i=0; i<chromWidth; i++) {
550 ydst[2*i+0] = src[4*i+0];
551 ydst[2*i+1] = src[4*i+2];
552 }
553 udst += chromStride;
554 vdst += chromStride;
555 ydst += lumStride;
556 src += srcStride;
557 }
558 }
559
560 static inline void planar2x_c(const uint8_t *src, uint8_t *dst, long srcWidth,
561 long srcHeight, long srcStride, long dstStride)
562 {
563 long x,y;
564
565 dst[0]= src[0];
566
567 // first line
568 for (x=0; x<srcWidth-1; x++) {
569 dst[2*x+1]= (3*src[x] + src[x+1])>>2;
570 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
571 }
572 dst[2*srcWidth-1]= src[srcWidth-1];
573
574 dst+= dstStride;
575
576 for (y=1; y<srcHeight; y++) {
577 const int mmxSize = 1;
578
579 dst[0 ]= (3*src[0] + src[srcStride])>>2;
580 dst[dstStride]= ( src[0] + 3*src[srcStride])>>2;
581
582 for (x=mmxSize-1; x<srcWidth-1; x++) {
583 dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2;
584 dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2;
585 dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2;
586 dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2;
587 }
588 dst[srcWidth*2 -1 ]= (3*src[srcWidth-1] + src[srcWidth-1 + srcStride])>>2;
589 dst[srcWidth*2 -1 + dstStride]= ( src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2;
590
591 dst+=dstStride*2;
592 src+=srcStride;
593 }
594
595 // last line
596 #if 1
597 dst[0]= src[0];
598
599 for (x=0; x<srcWidth-1; x++) {
600 dst[2*x+1]= (3*src[x] + src[x+1])>>2;
601 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
602 }
603 dst[2*srcWidth-1]= src[srcWidth-1];
604 #else
605 for (x=0; x<srcWidth; x++) {
606 dst[2*x+0]=
607 dst[2*x+1]= src[x];
608 }
609 #endif
610 }
611
612 /**
613 * Height should be a multiple of 2 and width should be a multiple of 16.
614 * (If this is a problem for anyone then tell me, and I will fix it.)
615 * Chrominance data is only taken from every second line, others are ignored.
616 * FIXME: Write HQ version.
617 */
618 static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
619 uint8_t *udst, uint8_t *vdst,
620 long width, long height,
621 long lumStride, long chromStride,
622 long srcStride)
623 {
624 long y;
625 const int chromWidth = width >> 1;
626 for (y=0; y<height; y+=2) {
627 long i;
628 for (i=0; i<chromWidth; i++) {
629 udst[i] = src[4*i+0];
630 ydst[2*i+0] = src[4*i+1];
631 vdst[i] = src[4*i+2];
632 ydst[2*i+1] = src[4*i+3];
633 }
634 ydst += lumStride;
635 src += srcStride;
636
637 for (i=0; i<chromWidth; i++) {
638 ydst[2*i+0] = src[4*i+1];
639 ydst[2*i+1] = src[4*i+3];
640 }
641 udst += chromStride;
642 vdst += chromStride;
643 ydst += lumStride;
644 src += srcStride;
645 }
646 }
647
648 /**
649 * Height should be a multiple of 2 and width should be a multiple of 2.
650 * (If this is a problem for anyone then tell me, and I will fix it.)
651 * Chrominance data is only taken from every second line,
652 * others are ignored in the C version.
653 * FIXME: Write HQ version.
654 */
655 static inline void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst,
656 uint8_t *udst, uint8_t *vdst,
657 long width, long height,
658 long lumStride, long chromStride,
659 long srcStride)
660 {
661 long y;
662 const int chromWidth = width >> 1;
663 y=0;
664 for (; y<height; y+=2) {
665 long i;
666 for (i=0; i<chromWidth; i++) {
667 unsigned int b = src[6*i+0];
668 unsigned int g = src[6*i+1];
669 unsigned int r = src[6*i+2];
670
671 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
672 unsigned int V = ((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128;
673 unsigned int U = ((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128;
674
675 udst[i] = U;
676 vdst[i] = V;
677 ydst[2*i] = Y;
678
679 b = src[6*i+3];
680 g = src[6*i+4];
681 r = src[6*i+5];
682
683 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
684 ydst[2*i+1] = Y;
685 }
686 ydst += lumStride;
687 src += srcStride;
688
689 for (i=0; i<chromWidth; i++) {
690 unsigned int b = src[6*i+0];
691 unsigned int g = src[6*i+1];
692 unsigned int r = src[6*i+2];
693
694 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
695
696 ydst[2*i] = Y;
697
698 b = src[6*i+3];
699 g = src[6*i+4];
700 r = src[6*i+5];
701
702 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
703 ydst[2*i+1] = Y;
704 }
705 udst += chromStride;
706 vdst += chromStride;
707 ydst += lumStride;
708 src += srcStride;
709 }
710 }
711
712 static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
713 uint8_t *dest, long width,
714 long height, long src1Stride,
715 long src2Stride, long dstStride)
716 {
717 long h;
718
719 for (h=0; h < height; h++) {
720 long w;
721 for (w=0; w < width; w++) {
722 dest[2*w+0] = src1[w];
723 dest[2*w+1] = src2[w];
724 }
725 dest += dstStride;
726 src1 += src1Stride;
727 src2 += src2Stride;
728 }
729 }
730
731 static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2,
732 uint8_t *dst1, uint8_t *dst2,
733 long width, long height,
734 long srcStride1, long srcStride2,
735 long dstStride1, long dstStride2)
736 {
737 int y;
738 long x,w,h;
739 w=width/2; h=height/2;
740 for (y=0;y<h;y++) {
741 const uint8_t* s1=src1+srcStride1*(y>>1);
742 uint8_t* d=dst1+dstStride1*y;
743 x=0;
744 for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
745 }
746 for (y=0;y<h;y++) {
747 const uint8_t* s2=src2+srcStride2*(y>>1);
748 uint8_t* d=dst2+dstStride2*y;
749 x=0;
750 for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x];
751 }
752 }
753
754 static inline void yvu9_to_yuy2_c(const uint8_t *src1, const uint8_t *src2,
755 const uint8_t *src3, uint8_t *dst,
756 long width, long height,
757 long srcStride1, long srcStride2,
758 long srcStride3, long dstStride)
759 {
760 int x;
761 long y,w,h;
762 w=width/2; h=height;
763 for (y=0;y<h;y++) {
764 const uint8_t* yp=src1+srcStride1*y;
765 const uint8_t* up=src2+srcStride2*(y>>2);
766 const uint8_t* vp=src3+srcStride3*(y>>2);
767 uint8_t* d=dst+dstStride*y;
768 x=0;
769 for (; x<w; x++) {
770 const long x2 = x<<2;
771 d[8*x+0] = yp[x2];
772 d[8*x+1] = up[x];
773 d[8*x+2] = yp[x2+1];
774 d[8*x+3] = vp[x];
775 d[8*x+4] = yp[x2+2];
776 d[8*x+5] = up[x];
777 d[8*x+6] = yp[x2+3];
778 d[8*x+7] = vp[x];
779 }
780 }
781 }
782
783 static void extract_even_c(const uint8_t *src, uint8_t *dst, int count)
784 {
785 dst += count;
786 src += 2*count;
787 count= - count;
788
789 while(count<0) {
790 dst[count]= src[2*count];
791 count++;
792 }
793 }
794
795 static void extract_even2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
796 int count)
797 {
798 dst0+= count;
799 dst1+= count;
800 src += 4*count;
801 count= - count;
802 while(count<0) {
803 dst0[count]= src[4*count+0];
804 dst1[count]= src[4*count+2];
805 count++;
806 }
807 }
808
809 static void extract_even2avg_c(const uint8_t *src0, const uint8_t *src1,
810 uint8_t *dst0, uint8_t *dst1, int count)
811 {
812 dst0 += count;
813 dst1 += count;
814 src0 += 4*count;
815 src1 += 4*count;
816 count= - count;
817 while(count<0) {
818 dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
819 dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
820 count++;
821 }
822 }
823
824 static void extract_odd2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
825 int count)
826 {
827 dst0+= count;
828 dst1+= count;
829 src += 4*count;
830 count= - count;
831 src++;
832 while(count<0) {
833 dst0[count]= src[4*count+0];
834 dst1[count]= src[4*count+2];
835 count++;
836 }
837 }
838
839 static void extract_odd2avg_c(const uint8_t *src0, const uint8_t *src1,
840 uint8_t *dst0, uint8_t *dst1, int count)
841 {
842 dst0 += count;
843 dst1 += count;
844 src0 += 4*count;
845 src1 += 4*count;
846 count= - count;
847 src0++;
848 src1++;
849 while(count<0) {
850 dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
851 dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
852 count++;
853 }
854 }
855
856 static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
857 const uint8_t *src, long width, long height,
858 long lumStride, long chromStride, long srcStride)
859 {
860 long y;
861 const long chromWidth= -((-width)>>1);
862
863 for (y=0; y<height; y++) {
864 extract_even_c(src, ydst, width);
865 if(y&1) {
866 extract_odd2avg_c(src - srcStride, src, udst, vdst, chromWidth);
867 udst+= chromStride;
868 vdst+= chromStride;
869 }
870
871 src += srcStride;
872 ydst+= lumStride;
873 }
874 }
875
876 static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
877 const uint8_t *src, long width, long height,
878 long lumStride, long chromStride, long srcStride)
879 {
880 long y;
881 const long chromWidth= -((-width)>>1);
882
883 for (y=0; y<height; y++) {
884 extract_even_c(src, ydst, width);
885 extract_odd2_c(src, udst, vdst, chromWidth);
886
887 src += srcStride;
888 ydst+= lumStride;
889 udst+= chromStride;
890 vdst+= chromStride;
891 }
892 }
893
894 static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
895 const uint8_t *src, long width, long height,
896 long lumStride, long chromStride, long srcStride)
897 {
898 long y;
899 const long chromWidth= -((-width)>>1);
900
901 for (y=0; y<height; y++) {
902 extract_even_c(src + 1, ydst, width);
903 if(y&1) {
904 extract_even2avg_c(src - srcStride, src, udst, vdst, chromWidth);
905 udst+= chromStride;
906 vdst+= chromStride;
907 }
908
909 src += srcStride;
910 ydst+= lumStride;
911 }
912 }
913
914 static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
915 const uint8_t *src, long width, long height,
916 long lumStride, long chromStride, long srcStride)
917 {
918 long y;
919 const long chromWidth= -((-width)>>1);
920
921 for (y=0; y<height; y++) {
922 extract_even_c(src + 1, ydst, width);
923 extract_even2_c(src, udst, vdst, chromWidth);
924
925 src += srcStride;
926 ydst+= lumStride;
927 udst+= chromStride;
928 vdst+= chromStride;
929 }
930 }
931
932 static inline void rgb2rgb_init_c(void)
933 {
934 rgb15to16 = rgb15to16_c;
935 rgb15tobgr24 = rgb15tobgr24_c;
936 rgb15to32 = rgb15to32_c;
937 rgb16tobgr24 = rgb16tobgr24_c;
938 rgb16to32 = rgb16to32_c;
939 rgb16to15 = rgb16to15_c;
940 rgb24tobgr16 = rgb24tobgr16_c;
941 rgb24tobgr15 = rgb24tobgr15_c;
942 rgb24tobgr32 = rgb24tobgr32_c;
943 rgb32to16 = rgb32to16_c;
944 rgb32to15 = rgb32to15_c;
945 rgb32tobgr24 = rgb32tobgr24_c;
946 rgb24to15 = rgb24to15_c;
947 rgb24to16 = rgb24to16_c;
948 rgb24tobgr24 = rgb24tobgr24_c;
949 shuffle_bytes_2103 = shuffle_bytes_2103_c;
950 rgb32tobgr16 = rgb32tobgr16_c;
951 rgb32tobgr15 = rgb32tobgr15_c;
952 yv12toyuy2 = yv12toyuy2_c;
953 yv12touyvy = yv12touyvy_c;
954 yuv422ptoyuy2 = yuv422ptoyuy2_c;
955 yuv422ptouyvy = yuv422ptouyvy_c;
956 yuy2toyv12 = yuy2toyv12_c;
957 planar2x = planar2x_c;
958 rgb24toyv12 = rgb24toyv12_c;
959 interleaveBytes = interleaveBytes_c;
960 vu9_to_vu12 = vu9_to_vu12_c;
961 yvu9_to_yuy2 = yvu9_to_yuy2_c;
962
963 uyvytoyuv420 = uyvytoyuv420_c;
964 uyvytoyuv422 = uyvytoyuv422_c;
965 yuyvtoyuv420 = yuyvtoyuv420_c;
966 yuyvtoyuv422 = yuyvtoyuv422_c;
967 }