green line fix for dstw%8!=0
[libav.git] / postproc / rgb2rgb.c
CommitLineData
a3aece93
NK
1/*
2 *
3 * rgb2rgb.c, Software RGB to RGB convertor
4 * Written by Nick Kurshev.
5 */
b234ae81
NK
6#include <inttypes.h>
7#include "../config.h"
8#include "rgb2rgb.h"
a3aece93
NK
9#include "../mmx_defs.h"
10
e697a141 11#ifdef HAVE_MMX
a3aece93
NK
12static const uint64_t mask32 __attribute__((aligned(8))) = 0x00FFFFFF00FFFFFFULL;
13static const uint64_t mask24l __attribute__((aligned(8))) = 0x0000000000FFFFFFULL;
14static const uint64_t mask24h __attribute__((aligned(8))) = 0x0000FFFFFF000000ULL;
15static const uint64_t mask15b __attribute__((aligned(8))) = 0x001F001F001F001FULL; /* 00000000 00011111 xxB */
16static const uint64_t mask15rg __attribute__((aligned(8))) = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000 RGx */
e697a141 17#endif
79811694 18
56993147 19void rgb24to32(const uint8_t *src,uint8_t *dst,uint32_t src_size)
b234ae81 20{
fde33ab5 21 uint8_t *dest = dst;
56993147
NK
22 const uint8_t *s = src;
23 const uint8_t *end;
49a0c6ee 24#ifdef HAVE_MMX
49a0c6ee
NK
25 uint8_t *mm_end;
26#endif
b234ae81 27 end = s + src_size;
49a0c6ee 28#ifdef HAVE_MMX
a3aece93 29 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
90f8a479 30 mm_end = (uint8_t*)((((unsigned long)end)/(MMREG_SIZE*2))*(MMREG_SIZE*2));
a3aece93 31 __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory");
90f8a479 32 if(mm_end == end) mm_end -= MMREG_SIZE*2;
49a0c6ee
NK
33 while(s < mm_end)
34 {
35 __asm __volatile(
a3aece93 36 PREFETCH" 32%1\n\t"
49a0c6ee
NK
37 "movd %1, %%mm0\n\t"
38 "movd 3%1, %%mm1\n\t"
39 "movd 6%1, %%mm2\n\t"
40 "movd 9%1, %%mm3\n\t"
41 "punpckldq %%mm1, %%mm0\n\t"
42 "punpckldq %%mm3, %%mm2\n\t"
43 "pand %%mm7, %%mm0\n\t"
44 "pand %%mm7, %%mm2\n\t"
96b956cc
NK
45 MOVNTQ" %%mm0, %0\n\t"
46 MOVNTQ" %%mm2, 8%0"
49a0c6ee
NK
47 :"=m"(*dest)
48 :"m"(*s)
49 :"memory");
50 dest += 16;
51 s += 12;
52 }
79811694 53 __asm __volatile(SFENCE:::"memory");
96b956cc 54 __asm __volatile(EMMS:::"memory");
49a0c6ee 55#endif
b234ae81
NK
56 while(s < end)
57 {
fde33ab5
NK
58 *dest++ = *s++;
59 *dest++ = *s++;
60 *dest++ = *s++;
61 *dest++ = 0;
b234ae81
NK
62 }
63}
59ac5a93 64
56993147 65void rgb32to24(const uint8_t *src,uint8_t *dst,uint32_t src_size)
59ac5a93
NK
66{
67 uint8_t *dest = dst;
56993147
NK
68 const uint8_t *s = src;
69 const uint8_t *end;
494a6294 70#ifdef HAVE_MMX
494a6294
NK
71 uint8_t *mm_end;
72#endif
59ac5a93 73 end = s + src_size;
494a6294 74#ifdef HAVE_MMX
a3aece93 75 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
494a6294
NK
76 mm_end = (uint8_t*)((((unsigned long)end)/(MMREG_SIZE*2))*(MMREG_SIZE*2));
77 __asm __volatile(
a3aece93
NK
78 "movq %0, %%mm7\n\t"
79 "movq %1, %%mm6"
80 ::"m"(mask24l),"m"(mask24h):"memory");
494a6294
NK
81 if(mm_end == end) mm_end -= MMREG_SIZE*2;
82 while(s < mm_end)
83 {
84 __asm __volatile(
a3aece93 85 PREFETCH" 32%1\n\t"
494a6294
NK
86 "movq %1, %%mm0\n\t"
87 "movq 8%1, %%mm1\n\t"
88 "movq %%mm0, %%mm2\n\t"
89 "movq %%mm1, %%mm3\n\t"
90 "psrlq $8, %%mm2\n\t"
91 "psrlq $8, %%mm3\n\t"
92 "pand %%mm7, %%mm0\n\t"
93 "pand %%mm7, %%mm1\n\t"
94 "pand %%mm6, %%mm2\n\t"
95 "pand %%mm6, %%mm3\n\t"
96 "por %%mm2, %%mm0\n\t"
97 "por %%mm3, %%mm1\n\t"
98 MOVNTQ" %%mm0, %0\n\t"
99 MOVNTQ" %%mm1, 6%0"
100 :"=m"(*dest)
101 :"m"(*s)
102 :"memory");
103 dest += 12;
104 s += 16;
105 }
106 __asm __volatile(SFENCE:::"memory");
107 __asm __volatile(EMMS:::"memory");
108#endif
59ac5a93
NK
109 while(s < end)
110 {
111 *dest++ = *s++;
112 *dest++ = *s++;
113 *dest++ = *s++;
114 s++;
115 }
116}
b238eb2e 117
a3aece93
NK
118/*
119 Original by Strepto/Astral
120 ported to gcc & bugfixed : A'rpi
51da31f1 121 MMX2, 3DNOW optimization by Nick Kurshev
a3aece93 122*/
56993147 123void rgb15to16(const uint8_t *src,uint8_t *dst,uint32_t src_size)
b238eb2e
NK
124{
125#ifdef HAVE_MMX
56993147 126 register const char* s=src+src_size;
b238eb2e
NK
127 register char* d=dst+src_size;
128 register int offs=-src_size;
a3aece93
NK
129 __asm __volatile(PREFETCH" %0"::"m"(*(s+offs)):"memory");
130 __asm __volatile(
131 "movq %0, %%mm4\n\t"
132 "movq %1, %%mm5"
133 ::"m"(mask15b), "m"(mask15rg):"memory");
134 while(offs<0)
135 {
136 __asm __volatile(
137 PREFETCH" 32%1\n\t"
138 "movq %1, %%mm0\n\t"
139 "movq 8%1, %%mm2\n\t"
140 "movq %%mm0, %%mm1\n\t"
141 "movq %%mm2, %%mm3\n\t"
142 "pand %%mm4, %%mm0\n\t"
143 "pand %%mm5, %%mm1\n\t"
144 "pand %%mm4, %%mm2\n\t"
145 "pand %%mm5, %%mm3\n\t"
146 "psllq $1, %%mm1\n\t"
147 "psllq $1, %%mm3\n\t"
148 "por %%mm1, %%mm0\n\t"
149 "por %%mm3, %%mm2\n\t"
150 MOVNTQ" %%mm0, %0\n\t"
151 MOVNTQ" %%mm2, 8%0"
152 :"=m"(*(d+offs))
153 :"m"(*(s+offs))
154 :"memory");
155 offs+=16;
b238eb2e 156 }
a3aece93
NK
157 __asm __volatile(SFENCE:::"memory");
158 __asm __volatile(EMMS:::"memory");
b238eb2e 159#else
56993147 160 const uint16_t *s1=( uint16_t * )src;
b238eb2e
NK
161 uint16_t *d1=( uint16_t * )dst;
162 uint16_t *e=((uint8_t *)s1)+src_size;
163 while( s1<e ){
164 register int x=*( s1++ );
165 /* rrrrrggggggbbbbb
166 0rrrrrgggggbbbbb
167 0111 1111 1110 0000=0x7FE0
168 00000000000001 1111=0x001F */
169 *( d1++ )=( x&0x001F )|( ( x&0x7FE0 )<<1 );
170 }
171#endif
172}