Rename "funny" code to "mmx2 filter" code.
[libav.git] / libswscale / swscale_template.c
CommitLineData
fe8054c0 1/*
d026b45e
DB
2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with FFmpeg; if not, write to the Free Software
b19bcbaa 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
d026b45e 19 *
8a322796
DB
20 * The C code (not assembly, MMX, ...) of this file can be used
21 * under the LGPL license.
d026b45e 22 */
783e9cc9 23
6e1c66bc 24#undef REAL_MOVNTQ
541c4eb9 25#undef MOVNTQ
7d7f78b5 26#undef PAVGB
48a05cec
MN
27#undef PREFETCH
28#undef PREFETCHW
48a05cec 29
94daf2e9 30#if COMPILE_TEMPLATE_AMD3DNOW
48a05cec
MN
31#define PREFETCH "prefetch"
32#define PREFETCHW "prefetchw"
94daf2e9 33#elif COMPILE_TEMPLATE_MMX2
48a05cec
MN
34#define PREFETCH "prefetchnta"
35#define PREFETCHW "prefetcht0"
36#else
d904b5fc
NP
37#define PREFETCH " # nop"
38#define PREFETCHW " # nop"
48a05cec
MN
39#endif
40
94daf2e9 41#if COMPILE_TEMPLATE_MMX2
d604bab9 42#define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
94daf2e9 43#elif COMPILE_TEMPLATE_AMD3DNOW
d604bab9
MN
44#define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
45#endif
d3f41512 46
94daf2e9 47#if COMPILE_TEMPLATE_MMX2
6e1c66bc 48#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
d604bab9 49#else
6e1c66bc 50#define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
d604bab9 51#endif
6e1c66bc 52#define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
d604bab9 53
94daf2e9 54#if COMPILE_TEMPLATE_ALTIVEC
009d2d74 55#include "ppc/swscale_altivec_template.c"
a2faa401
RD
56#endif
57
bca11e75 58#define YSCALEYUV2YV12X(x, offset, dest, width) \
7ad6469e 59 __asm__ volatile(\
2da0d70d
DB
60 "xor %%"REG_a", %%"REG_a" \n\t"\
61 "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
62 "movq %%mm3, %%mm4 \n\t"\
63 "lea " offset "(%0), %%"REG_d" \n\t"\
64 "mov (%%"REG_d"), %%"REG_S" \n\t"\
65 ASMALIGN(4) /* FIXME Unroll? */\
66 "1: \n\t"\
67 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
8b2fce0d
MN
68 "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\
69 "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" /* srcData */\
2da0d70d
DB
70 "add $16, %%"REG_d" \n\t"\
71 "mov (%%"REG_d"), %%"REG_S" \n\t"\
72 "test %%"REG_S", %%"REG_S" \n\t"\
73 "pmulhw %%mm0, %%mm2 \n\t"\
74 "pmulhw %%mm0, %%mm5 \n\t"\
75 "paddw %%mm2, %%mm3 \n\t"\
76 "paddw %%mm5, %%mm4 \n\t"\
77 " jnz 1b \n\t"\
78 "psraw $3, %%mm3 \n\t"\
79 "psraw $3, %%mm4 \n\t"\
80 "packuswb %%mm4, %%mm3 \n\t"\
81 MOVNTQ(%%mm3, (%1, %%REGa))\
82 "add $8, %%"REG_a" \n\t"\
83 "cmp %2, %%"REG_a" \n\t"\
84 "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
85 "movq %%mm3, %%mm4 \n\t"\
86 "lea " offset "(%0), %%"REG_d" \n\t"\
87 "mov (%%"REG_d"), %%"REG_S" \n\t"\
88 "jb 1b \n\t"\
89 :: "r" (&c->redDither),\
90 "r" (dest), "g" (width)\
91 : "%"REG_a, "%"REG_d, "%"REG_S\
92 );
bca11e75
MN
93
94#define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
7ad6469e 95 __asm__ volatile(\
2da0d70d
DB
96 "lea " offset "(%0), %%"REG_d" \n\t"\
97 "xor %%"REG_a", %%"REG_a" \n\t"\
98 "pxor %%mm4, %%mm4 \n\t"\
99 "pxor %%mm5, %%mm5 \n\t"\
100 "pxor %%mm6, %%mm6 \n\t"\
101 "pxor %%mm7, %%mm7 \n\t"\
102 "mov (%%"REG_d"), %%"REG_S" \n\t"\
103 ASMALIGN(4) \
104 "1: \n\t"\
8b2fce0d
MN
105 "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* srcData */\
106 "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\
1625216e 107 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
8b2fce0d 108 "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm1 \n\t" /* srcData */\
2da0d70d
DB
109 "movq %%mm0, %%mm3 \n\t"\
110 "punpcklwd %%mm1, %%mm0 \n\t"\
111 "punpckhwd %%mm1, %%mm3 \n\t"\
1625216e 112 "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
2da0d70d
DB
113 "pmaddwd %%mm1, %%mm0 \n\t"\
114 "pmaddwd %%mm1, %%mm3 \n\t"\
115 "paddd %%mm0, %%mm4 \n\t"\
116 "paddd %%mm3, %%mm5 \n\t"\
8b2fce0d 117 "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* srcData */\
1625216e
MN
118 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
119 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
2da0d70d
DB
120 "test %%"REG_S", %%"REG_S" \n\t"\
121 "movq %%mm2, %%mm0 \n\t"\
122 "punpcklwd %%mm3, %%mm2 \n\t"\
123 "punpckhwd %%mm3, %%mm0 \n\t"\
124 "pmaddwd %%mm1, %%mm2 \n\t"\
125 "pmaddwd %%mm1, %%mm0 \n\t"\
126 "paddd %%mm2, %%mm6 \n\t"\
127 "paddd %%mm0, %%mm7 \n\t"\
128 " jnz 1b \n\t"\
129 "psrad $16, %%mm4 \n\t"\
130 "psrad $16, %%mm5 \n\t"\
131 "psrad $16, %%mm6 \n\t"\
132 "psrad $16, %%mm7 \n\t"\
133 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
134 "packssdw %%mm5, %%mm4 \n\t"\
135 "packssdw %%mm7, %%mm6 \n\t"\
136 "paddw %%mm0, %%mm4 \n\t"\
137 "paddw %%mm0, %%mm6 \n\t"\
138 "psraw $3, %%mm4 \n\t"\
139 "psraw $3, %%mm6 \n\t"\
140 "packuswb %%mm6, %%mm4 \n\t"\
141 MOVNTQ(%%mm4, (%1, %%REGa))\
142 "add $8, %%"REG_a" \n\t"\
143 "cmp %2, %%"REG_a" \n\t"\
144 "lea " offset "(%0), %%"REG_d" \n\t"\
145 "pxor %%mm4, %%mm4 \n\t"\
146 "pxor %%mm5, %%mm5 \n\t"\
147 "pxor %%mm6, %%mm6 \n\t"\
148 "pxor %%mm7, %%mm7 \n\t"\
149 "mov (%%"REG_d"), %%"REG_S" \n\t"\
150 "jb 1b \n\t"\
151 :: "r" (&c->redDither),\
152 "r" (dest), "g" (width)\
153 : "%"REG_a, "%"REG_d, "%"REG_S\
154 );
c1b0bfb4
MN
155
156#define YSCALEYUV2YV121 \
2da0d70d
DB
157 "mov %2, %%"REG_a" \n\t"\
158 ASMALIGN(4) /* FIXME Unroll? */\
159 "1: \n\t"\
160 "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
161 "movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\
162 "psraw $7, %%mm0 \n\t"\
163 "psraw $7, %%mm1 \n\t"\
164 "packuswb %%mm1, %%mm0 \n\t"\
165 MOVNTQ(%%mm0, (%1, %%REGa))\
166 "add $8, %%"REG_a" \n\t"\
167 "jnc 1b \n\t"
c1b0bfb4 168
bf2bdde6
MN
169#define YSCALEYUV2YV121_ACCURATE \
170 "mov %2, %%"REG_a" \n\t"\
171 "pcmpeqw %%mm7, %%mm7 \n\t"\
172 "psrlw $15, %%mm7 \n\t"\
173 "psllw $6, %%mm7 \n\t"\
174 ASMALIGN(4) /* FIXME Unroll? */\
175 "1: \n\t"\
176 "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
177 "movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\
33a67bd6
MN
178 "paddsw %%mm7, %%mm0 \n\t"\
179 "paddsw %%mm7, %%mm1 \n\t"\
bf2bdde6
MN
180 "psraw $7, %%mm0 \n\t"\
181 "psraw $7, %%mm1 \n\t"\
182 "packuswb %%mm1, %%mm0 \n\t"\
183 MOVNTQ(%%mm0, (%1, %%REGa))\
184 "add $8, %%"REG_a" \n\t"\
185 "jnc 1b \n\t"
186
c1b0bfb4 187/*
2da0d70d
DB
188 :: "m" (-lumFilterSize), "m" (-chrFilterSize),
189 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4),
190 "r" (dest), "m" (dstW),
191 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
192 : "%eax", "%ebx", "%ecx", "%edx", "%esi"
c1b0bfb4 193*/
df57ab14 194#define YSCALEYUV2PACKEDX_UV \
7ad6469e 195 __asm__ volatile(\
2da0d70d
DB
196 "xor %%"REG_a", %%"REG_a" \n\t"\
197 ASMALIGN(4)\
198 "nop \n\t"\
199 "1: \n\t"\
200 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
201 "mov (%%"REG_d"), %%"REG_S" \n\t"\
202 "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
203 "movq %%mm3, %%mm4 \n\t"\
204 ASMALIGN(4)\
205 "2: \n\t"\
206 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
207 "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\
8b2fce0d 208 "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm5 \n\t" /* VsrcData */\
2da0d70d
DB
209 "add $16, %%"REG_d" \n\t"\
210 "mov (%%"REG_d"), %%"REG_S" \n\t"\
211 "pmulhw %%mm0, %%mm2 \n\t"\
212 "pmulhw %%mm0, %%mm5 \n\t"\
213 "paddw %%mm2, %%mm3 \n\t"\
214 "paddw %%mm5, %%mm4 \n\t"\
215 "test %%"REG_S", %%"REG_S" \n\t"\
216 " jnz 2b \n\t"\
df57ab14 217
fe91924d 218#define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \
df57ab14 219 "lea "offset"(%0), %%"REG_d" \n\t"\
2da0d70d 220 "mov (%%"REG_d"), %%"REG_S" \n\t"\
fe91924d
CS
221 "movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\
222 "movq "#dst1", "#dst2" \n\t"\
2da0d70d
DB
223 ASMALIGN(4)\
224 "2: \n\t"\
fe91924d
CS
225 "movq 8(%%"REG_d"), "#coeff" \n\t" /* filterCoeff */\
226 "movq (%%"REG_S", %%"REG_a", 2), "#src1" \n\t" /* Y1srcData */\
227 "movq 8(%%"REG_S", %%"REG_a", 2), "#src2" \n\t" /* Y2srcData */\
2da0d70d
DB
228 "add $16, %%"REG_d" \n\t"\
229 "mov (%%"REG_d"), %%"REG_S" \n\t"\
fe91924d
CS
230 "pmulhw "#coeff", "#src1" \n\t"\
231 "pmulhw "#coeff", "#src2" \n\t"\
232 "paddw "#src1", "#dst1" \n\t"\
233 "paddw "#src2", "#dst2" \n\t"\
2da0d70d
DB
234 "test %%"REG_S", %%"REG_S" \n\t"\
235 " jnz 2b \n\t"\
236
df57ab14
CS
237#define YSCALEYUV2PACKEDX \
238 YSCALEYUV2PACKEDX_UV \
fe91924d 239 YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET,%%mm0,%%mm2,%%mm5,%%mm1,%%mm7) \
df57ab14 240
2da0d70d
DB
241#define YSCALEYUV2PACKEDX_END \
242 :: "r" (&c->redDither), \
243 "m" (dummy), "m" (dummy), "m" (dummy),\
244 "r" (dest), "m" (dstW) \
245 : "%"REG_a, "%"REG_d, "%"REG_S \
246 );
8422aa88 247
df57ab14 248#define YSCALEYUV2PACKEDX_ACCURATE_UV \
7ad6469e 249 __asm__ volatile(\
2da0d70d
DB
250 "xor %%"REG_a", %%"REG_a" \n\t"\
251 ASMALIGN(4)\
252 "nop \n\t"\
253 "1: \n\t"\
254 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
255 "mov (%%"REG_d"), %%"REG_S" \n\t"\
256 "pxor %%mm4, %%mm4 \n\t"\
257 "pxor %%mm5, %%mm5 \n\t"\
258 "pxor %%mm6, %%mm6 \n\t"\
259 "pxor %%mm7, %%mm7 \n\t"\
260 ASMALIGN(4)\
261 "2: \n\t"\
262 "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\
8b2fce0d 263 "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\
1625216e 264 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
2da0d70d
DB
265 "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\
266 "movq %%mm0, %%mm3 \n\t"\
267 "punpcklwd %%mm1, %%mm0 \n\t"\
268 "punpckhwd %%mm1, %%mm3 \n\t"\
1625216e 269 "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1 \n\t" /* filterCoeff */\
2da0d70d
DB
270 "pmaddwd %%mm1, %%mm0 \n\t"\
271 "pmaddwd %%mm1, %%mm3 \n\t"\
272 "paddd %%mm0, %%mm4 \n\t"\
273 "paddd %%mm3, %%mm5 \n\t"\
8b2fce0d 274 "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\
1625216e
MN
275 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
276 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
2da0d70d
DB
277 "test %%"REG_S", %%"REG_S" \n\t"\
278 "movq %%mm2, %%mm0 \n\t"\
279 "punpcklwd %%mm3, %%mm2 \n\t"\
280 "punpckhwd %%mm3, %%mm0 \n\t"\
281 "pmaddwd %%mm1, %%mm2 \n\t"\
282 "pmaddwd %%mm1, %%mm0 \n\t"\
283 "paddd %%mm2, %%mm6 \n\t"\
284 "paddd %%mm0, %%mm7 \n\t"\
285 " jnz 2b \n\t"\
286 "psrad $16, %%mm4 \n\t"\
287 "psrad $16, %%mm5 \n\t"\
288 "psrad $16, %%mm6 \n\t"\
289 "psrad $16, %%mm7 \n\t"\
290 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
291 "packssdw %%mm5, %%mm4 \n\t"\
292 "packssdw %%mm7, %%mm6 \n\t"\
293 "paddw %%mm0, %%mm4 \n\t"\
294 "paddw %%mm0, %%mm6 \n\t"\
295 "movq %%mm4, "U_TEMP"(%0) \n\t"\
296 "movq %%mm6, "V_TEMP"(%0) \n\t"\
df57ab14
CS
297
298#define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
299 "lea "offset"(%0), %%"REG_d" \n\t"\
2da0d70d
DB
300 "mov (%%"REG_d"), %%"REG_S" \n\t"\
301 "pxor %%mm1, %%mm1 \n\t"\
302 "pxor %%mm5, %%mm5 \n\t"\
303 "pxor %%mm7, %%mm7 \n\t"\
304 "pxor %%mm6, %%mm6 \n\t"\
305 ASMALIGN(4)\
306 "2: \n\t"\
307 "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\
308 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\
1625216e 309 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
2da0d70d
DB
310 "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\
311 "movq %%mm0, %%mm3 \n\t"\
312 "punpcklwd %%mm4, %%mm0 \n\t"\
313 "punpckhwd %%mm4, %%mm3 \n\t"\
1625216e 314 "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\
2da0d70d
DB
315 "pmaddwd %%mm4, %%mm0 \n\t"\
316 "pmaddwd %%mm4, %%mm3 \n\t"\
317 "paddd %%mm0, %%mm1 \n\t"\
318 "paddd %%mm3, %%mm5 \n\t"\
319 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\
1625216e
MN
320 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
321 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
2da0d70d
DB
322 "test %%"REG_S", %%"REG_S" \n\t"\
323 "movq %%mm2, %%mm0 \n\t"\
324 "punpcklwd %%mm3, %%mm2 \n\t"\
325 "punpckhwd %%mm3, %%mm0 \n\t"\
326 "pmaddwd %%mm4, %%mm2 \n\t"\
327 "pmaddwd %%mm4, %%mm0 \n\t"\
328 "paddd %%mm2, %%mm7 \n\t"\
329 "paddd %%mm0, %%mm6 \n\t"\
330 " jnz 2b \n\t"\
331 "psrad $16, %%mm1 \n\t"\
332 "psrad $16, %%mm5 \n\t"\
333 "psrad $16, %%mm7 \n\t"\
334 "psrad $16, %%mm6 \n\t"\
335 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
336 "packssdw %%mm5, %%mm1 \n\t"\
337 "packssdw %%mm6, %%mm7 \n\t"\
338 "paddw %%mm0, %%mm1 \n\t"\
339 "paddw %%mm0, %%mm7 \n\t"\
340 "movq "U_TEMP"(%0), %%mm3 \n\t"\
341 "movq "V_TEMP"(%0), %%mm4 \n\t"\
bca11e75 342
df57ab14
CS
343#define YSCALEYUV2PACKEDX_ACCURATE \
344 YSCALEYUV2PACKEDX_ACCURATE_UV \
345 YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET)
346
8422aa88 347#define YSCALEYUV2RGBX \
2da0d70d
DB
348 "psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\
349 "psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\
350 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
351 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
352 "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
353 "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
354/* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
355 "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
356 "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
357 "psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y-16)*/\
358 "psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y-16)*/\
359 "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
360 "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
361/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
362 "paddw %%mm3, %%mm4 \n\t"\
363 "movq %%mm2, %%mm0 \n\t"\
364 "movq %%mm5, %%mm6 \n\t"\
365 "movq %%mm4, %%mm3 \n\t"\
366 "punpcklwd %%mm2, %%mm2 \n\t"\
367 "punpcklwd %%mm5, %%mm5 \n\t"\
368 "punpcklwd %%mm4, %%mm4 \n\t"\
369 "paddw %%mm1, %%mm2 \n\t"\
370 "paddw %%mm1, %%mm5 \n\t"\
371 "paddw %%mm1, %%mm4 \n\t"\
372 "punpckhwd %%mm0, %%mm0 \n\t"\
373 "punpckhwd %%mm6, %%mm6 \n\t"\
374 "punpckhwd %%mm3, %%mm3 \n\t"\
375 "paddw %%mm7, %%mm0 \n\t"\
376 "paddw %%mm7, %%mm6 \n\t"\
377 "paddw %%mm7, %%mm3 \n\t"\
378 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
379 "packuswb %%mm0, %%mm2 \n\t"\
380 "packuswb %%mm6, %%mm5 \n\t"\
381 "packuswb %%mm3, %%mm4 \n\t"\
d604bab9 382
6e1c66bc 383#define REAL_YSCALEYUV2PACKED(index, c) \
2da0d70d
DB
384 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
385 "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1 \n\t"\
386 "psraw $3, %%mm0 \n\t"\
387 "psraw $3, %%mm1 \n\t"\
388 "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
389 "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
390 "xor "#index", "#index" \n\t"\
391 ASMALIGN(4)\
392 "1: \n\t"\
393 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
394 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8b2fce0d
MN
395 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
396 "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
2da0d70d
DB
397 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
398 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
399 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
400 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
401 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
402 "psraw $7, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
403 "psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
404 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
405 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
406 "movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\
407 "movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\
408 "movq 8(%0, "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\
409 "movq 8(%1, "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\
410 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
411 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
412 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
413 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
414 "psraw $7, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
415 "psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
416 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
417 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
6a4970ab 418
6e1c66bc 419#define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c)
6a4970ab 420
df57ab14 421#define REAL_YSCALEYUV2RGB_UV(index, c) \
2da0d70d
DB
422 "xor "#index", "#index" \n\t"\
423 ASMALIGN(4)\
424 "1: \n\t"\
425 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
426 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8b2fce0d
MN
427 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
428 "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
2da0d70d
DB
429 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
430 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
431 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
432 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
433 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
434 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
435 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
436 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
437 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
438 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
439 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
440 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
441 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
442 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
443 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
444 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
df57ab14 445
786dcfef
CS
446#define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \
447 "movq ("#b1", "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\
448 "movq ("#b2", "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\
449 "movq 8("#b1", "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\
450 "movq 8("#b2", "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\
2da0d70d
DB
451 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
452 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
453 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
454 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
455 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
456 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
457 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
458 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
df57ab14
CS
459
460#define REAL_YSCALEYUV2RGB_COEFF(c) \
2da0d70d
DB
461 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
462 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
463 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
464 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
465 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
466 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
467 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
468 "paddw %%mm3, %%mm4 \n\t"\
469 "movq %%mm2, %%mm0 \n\t"\
470 "movq %%mm5, %%mm6 \n\t"\
471 "movq %%mm4, %%mm3 \n\t"\
472 "punpcklwd %%mm2, %%mm2 \n\t"\
473 "punpcklwd %%mm5, %%mm5 \n\t"\
474 "punpcklwd %%mm4, %%mm4 \n\t"\
475 "paddw %%mm1, %%mm2 \n\t"\
476 "paddw %%mm1, %%mm5 \n\t"\
477 "paddw %%mm1, %%mm4 \n\t"\
478 "punpckhwd %%mm0, %%mm0 \n\t"\
479 "punpckhwd %%mm6, %%mm6 \n\t"\
480 "punpckhwd %%mm3, %%mm3 \n\t"\
481 "paddw %%mm7, %%mm0 \n\t"\
482 "paddw %%mm7, %%mm6 \n\t"\
483 "paddw %%mm7, %%mm3 \n\t"\
484 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
485 "packuswb %%mm0, %%mm2 \n\t"\
486 "packuswb %%mm6, %%mm5 \n\t"\
487 "packuswb %%mm3, %%mm4 \n\t"\
40494418 488
786dcfef 489#define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
df57ab14
CS
490
491#define YSCALEYUV2RGB(index, c) \
492 REAL_YSCALEYUV2RGB_UV(index, c) \
786dcfef 493 REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
df57ab14 494 REAL_YSCALEYUV2RGB_COEFF(c)
6a4970ab 495
6e1c66bc 496#define REAL_YSCALEYUV2PACKED1(index, c) \
2da0d70d
DB
497 "xor "#index", "#index" \n\t"\
498 ASMALIGN(4)\
499 "1: \n\t"\
500 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
8b2fce0d 501 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
2da0d70d
DB
502 "psraw $7, %%mm3 \n\t" \
503 "psraw $7, %%mm4 \n\t" \
504 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
505 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
506 "psraw $7, %%mm1 \n\t" \
507 "psraw $7, %%mm7 \n\t" \
6a4970ab 508
6e1c66bc 509#define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c)
6a4970ab 510
6e1c66bc 511#define REAL_YSCALEYUV2RGB1(index, c) \
2da0d70d
DB
512 "xor "#index", "#index" \n\t"\
513 ASMALIGN(4)\
514 "1: \n\t"\
515 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
8b2fce0d 516 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
2da0d70d
DB
517 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
518 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
519 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
520 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
521 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
522 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
523 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
524 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
525 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
526 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
527 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
528 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
529 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
530 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
531 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
532 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
533 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
534 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
535 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
536 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
537 "paddw %%mm3, %%mm4 \n\t"\
538 "movq %%mm2, %%mm0 \n\t"\
539 "movq %%mm5, %%mm6 \n\t"\
540 "movq %%mm4, %%mm3 \n\t"\
541 "punpcklwd %%mm2, %%mm2 \n\t"\
542 "punpcklwd %%mm5, %%mm5 \n\t"\
543 "punpcklwd %%mm4, %%mm4 \n\t"\
544 "paddw %%mm1, %%mm2 \n\t"\
545 "paddw %%mm1, %%mm5 \n\t"\
546 "paddw %%mm1, %%mm4 \n\t"\
547 "punpckhwd %%mm0, %%mm0 \n\t"\
548 "punpckhwd %%mm6, %%mm6 \n\t"\
549 "punpckhwd %%mm3, %%mm3 \n\t"\
550 "paddw %%mm7, %%mm0 \n\t"\
551 "paddw %%mm7, %%mm6 \n\t"\
552 "paddw %%mm7, %%mm3 \n\t"\
553 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
554 "packuswb %%mm0, %%mm2 \n\t"\
555 "packuswb %%mm6, %%mm5 \n\t"\
556 "packuswb %%mm3, %%mm4 \n\t"\
40494418 557
6e1c66bc 558#define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c)
497d4f99 559
6e1c66bc 560#define REAL_YSCALEYUV2PACKED1b(index, c) \
2da0d70d
DB
561 "xor "#index", "#index" \n\t"\
562 ASMALIGN(4)\
563 "1: \n\t"\
564 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
565 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8b2fce0d
MN
566 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
567 "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
2da0d70d
DB
568 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
569 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
570 "psrlw $8, %%mm3 \n\t" \
571 "psrlw $8, %%mm4 \n\t" \
572 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
573 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
574 "psraw $7, %%mm1 \n\t" \
575 "psraw $7, %%mm7 \n\t"
6e1c66bc 576#define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c)
6a4970ab 577
497d4f99 578// do vertical chrominance interpolation
6e1c66bc 579#define REAL_YSCALEYUV2RGB1b(index, c) \
2da0d70d
DB
580 "xor "#index", "#index" \n\t"\
581 ASMALIGN(4)\
582 "1: \n\t"\
583 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
584 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8b2fce0d
MN
585 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
586 "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
2da0d70d
DB
587 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
588 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
589 "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\
590 "psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\
591 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
592 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
593 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
594 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
595 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
596 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
597 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
598 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
599 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
600 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
601 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
602 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
603 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
604 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
605 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
606 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
607 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
608 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
609 "paddw %%mm3, %%mm4 \n\t"\
610 "movq %%mm2, %%mm0 \n\t"\
611 "movq %%mm5, %%mm6 \n\t"\
612 "movq %%mm4, %%mm3 \n\t"\
613 "punpcklwd %%mm2, %%mm2 \n\t"\
614 "punpcklwd %%mm5, %%mm5 \n\t"\
615 "punpcklwd %%mm4, %%mm4 \n\t"\
616 "paddw %%mm1, %%mm2 \n\t"\
617 "paddw %%mm1, %%mm5 \n\t"\
618 "paddw %%mm1, %%mm4 \n\t"\
619 "punpckhwd %%mm0, %%mm0 \n\t"\
620 "punpckhwd %%mm6, %%mm6 \n\t"\
621 "punpckhwd %%mm3, %%mm3 \n\t"\
622 "paddw %%mm7, %%mm0 \n\t"\
623 "paddw %%mm7, %%mm6 \n\t"\
624 "paddw %%mm7, %%mm3 \n\t"\
625 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
626 "packuswb %%mm0, %%mm2 \n\t"\
627 "packuswb %%mm6, %%mm5 \n\t"\
628 "packuswb %%mm3, %%mm4 \n\t"\
40494418 629
6e1c66bc 630#define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c)
d604bab9 631
6858492e
CS
632#define REAL_YSCALEYUV2RGB1_ALPHA(index) \
633 "movq (%1, "#index", 2), %%mm7 \n\t" /* abuf0[index ] */\
634 "movq 8(%1, "#index", 2), %%mm1 \n\t" /* abuf0[index+4] */\
635 "psraw $7, %%mm7 \n\t" /* abuf0[index ] >>7 */\
636 "psraw $7, %%mm1 \n\t" /* abuf0[index+4] >>7 */\
637 "packuswb %%mm1, %%mm7 \n\t"
638#define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
639
9c77b26b
CS
640#define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
641 "movq "#b", "#q2" \n\t" /* B */\
642 "movq "#r", "#t" \n\t" /* R */\
643 "punpcklbw "#g", "#b" \n\t" /* GBGBGBGB 0 */\
644 "punpcklbw "#a", "#r" \n\t" /* ARARARAR 0 */\
645 "punpckhbw "#g", "#q2" \n\t" /* GBGBGBGB 2 */\
646 "punpckhbw "#a", "#t" \n\t" /* ARARARAR 2 */\
647 "movq "#b", "#q0" \n\t" /* GBGBGBGB 0 */\
648 "movq "#q2", "#q3" \n\t" /* GBGBGBGB 2 */\
649 "punpcklwd "#r", "#q0" \n\t" /* ARGBARGB 0 */\
650 "punpckhwd "#r", "#b" \n\t" /* ARGBARGB 1 */\
651 "punpcklwd "#t", "#q2" \n\t" /* ARGBARGB 2 */\
652 "punpckhwd "#t", "#q3" \n\t" /* ARGBARGB 3 */\
d604bab9 653\
9c77b26b
CS
654 MOVNTQ( q0, (dst, index, 4))\
655 MOVNTQ( b, 8(dst, index, 4))\
656 MOVNTQ( q2, 16(dst, index, 4))\
657 MOVNTQ( q3, 24(dst, index, 4))\
d604bab9 658\
2da0d70d
DB
659 "add $8, "#index" \n\t"\
660 "cmp "#dstw", "#index" \n\t"\
661 " jb 1b \n\t"
9c77b26b 662#define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
d604bab9 663
27a90b04 664#define REAL_WRITERGB16(dst, dstw, index) \
2da0d70d
DB
665 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
666 "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\
667 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
668 "psrlq $3, %%mm2 \n\t"\
d604bab9 669\
2da0d70d
DB
670 "movq %%mm2, %%mm1 \n\t"\
671 "movq %%mm4, %%mm3 \n\t"\
d604bab9 672\
2da0d70d
DB
673 "punpcklbw %%mm7, %%mm3 \n\t"\
674 "punpcklbw %%mm5, %%mm2 \n\t"\
675 "punpckhbw %%mm7, %%mm4 \n\t"\
676 "punpckhbw %%mm5, %%mm1 \n\t"\
d604bab9 677\
2da0d70d
DB
678 "psllq $3, %%mm3 \n\t"\
679 "psllq $3, %%mm4 \n\t"\
d604bab9 680\
2da0d70d
DB
681 "por %%mm3, %%mm2 \n\t"\
682 "por %%mm4, %%mm1 \n\t"\
d604bab9 683\
2da0d70d
DB
684 MOVNTQ(%%mm2, (dst, index, 2))\
685 MOVNTQ(%%mm1, 8(dst, index, 2))\
d604bab9 686\
2da0d70d
DB
687 "add $8, "#index" \n\t"\
688 "cmp "#dstw", "#index" \n\t"\
689 " jb 1b \n\t"
27a90b04 690#define WRITERGB16(dst, dstw, index) REAL_WRITERGB16(dst, dstw, index)
d604bab9 691
27a90b04 692#define REAL_WRITERGB15(dst, dstw, index) \
2da0d70d
DB
693 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
694 "pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\
695 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
696 "psrlq $3, %%mm2 \n\t"\
697 "psrlq $1, %%mm5 \n\t"\
d604bab9 698\
2da0d70d
DB
699 "movq %%mm2, %%mm1 \n\t"\
700 "movq %%mm4, %%mm3 \n\t"\
d604bab9 701\
2da0d70d
DB
702 "punpcklbw %%mm7, %%mm3 \n\t"\
703 "punpcklbw %%mm5, %%mm2 \n\t"\
704 "punpckhbw %%mm7, %%mm4 \n\t"\
705 "punpckhbw %%mm5, %%mm1 \n\t"\
d604bab9 706\
2da0d70d
DB
707 "psllq $2, %%mm3 \n\t"\
708 "psllq $2, %%mm4 \n\t"\
d604bab9 709\
2da0d70d
DB
710 "por %%mm3, %%mm2 \n\t"\
711 "por %%mm4, %%mm1 \n\t"\
d604bab9 712\
2da0d70d
DB
713 MOVNTQ(%%mm2, (dst, index, 2))\
714 MOVNTQ(%%mm1, 8(dst, index, 2))\
d604bab9 715\
2da0d70d
DB
716 "add $8, "#index" \n\t"\
717 "cmp "#dstw", "#index" \n\t"\
718 " jb 1b \n\t"
27a90b04 719#define WRITERGB15(dst, dstw, index) REAL_WRITERGB15(dst, dstw, index)
f62255fb 720
6542b44e 721#define WRITEBGR24OLD(dst, dstw, index) \
2da0d70d
DB
722 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
723 "movq %%mm2, %%mm1 \n\t" /* B */\
724 "movq %%mm5, %%mm6 \n\t" /* R */\
725 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
726 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
727 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
728 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
729 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
730 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
731 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
732 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
733 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
734 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
d604bab9 735\
2da0d70d
DB
736 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
737 "psrlq $8, %%mm0 \n\t" /* 00RGB0RG 0 */\
738 "pand "MANGLE(bm00000111)", %%mm4 \n\t" /* 00000RGB 0 */\
739 "pand "MANGLE(bm11111000)", %%mm0 \n\t" /* 00RGB000 0.5 */\
740 "por %%mm4, %%mm0 \n\t" /* 00RGBRGB 0 */\
741 "movq %%mm2, %%mm4 \n\t" /* 0RGB0RGB 1 */\
742 "psllq $48, %%mm2 \n\t" /* GB000000 1 */\
743 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\
d604bab9 744\
2da0d70d
DB
745 "movq %%mm4, %%mm2 \n\t" /* 0RGB0RGB 1 */\
746 "psrld $16, %%mm4 \n\t" /* 000R000R 1 */\
747 "psrlq $24, %%mm2 \n\t" /* 0000RGB0 1.5 */\
748 "por %%mm4, %%mm2 \n\t" /* 000RRGBR 1 */\
749 "pand "MANGLE(bm00001111)", %%mm2 \n\t" /* 0000RGBR 1 */\
750 "movq %%mm1, %%mm4 \n\t" /* 0RGB0RGB 2 */\
751 "psrlq $8, %%mm1 \n\t" /* 00RGB0RG 2 */\
752 "pand "MANGLE(bm00000111)", %%mm4 \n\t" /* 00000RGB 2 */\
753 "pand "MANGLE(bm11111000)", %%mm1 \n\t" /* 00RGB000 2.5 */\
754 "por %%mm4, %%mm1 \n\t" /* 00RGBRGB 2 */\
755 "movq %%mm1, %%mm4 \n\t" /* 00RGBRGB 2 */\
756 "psllq $32, %%mm1 \n\t" /* BRGB0000 2 */\
757 "por %%mm1, %%mm2 \n\t" /* BRGBRGBR 1 */\
d604bab9 758\
2da0d70d
DB
759 "psrlq $32, %%mm4 \n\t" /* 000000RG 2.5 */\
760 "movq %%mm3, %%mm5 \n\t" /* 0RGB0RGB 3 */\
761 "psrlq $8, %%mm3 \n\t" /* 00RGB0RG 3 */\
762 "pand "MANGLE(bm00000111)", %%mm5 \n\t" /* 00000RGB 3 */\
763 "pand "MANGLE(bm11111000)", %%mm3 \n\t" /* 00RGB000 3.5 */\
764 "por %%mm5, %%mm3 \n\t" /* 00RGBRGB 3 */\
765 "psllq $16, %%mm3 \n\t" /* RGBRGB00 3 */\
766 "por %%mm4, %%mm3 \n\t" /* RGBRGBRG 2.5 */\
d604bab9 767\
2da0d70d
DB
768 MOVNTQ(%%mm0, (dst))\
769 MOVNTQ(%%mm2, 8(dst))\
770 MOVNTQ(%%mm3, 16(dst))\
771 "add $24, "#dst" \n\t"\
d604bab9 772\
2da0d70d
DB
773 "add $8, "#index" \n\t"\
774 "cmp "#dstw", "#index" \n\t"\
775 " jb 1b \n\t"
d604bab9 776
6542b44e 777#define WRITEBGR24MMX(dst, dstw, index) \
2da0d70d
DB
778 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
779 "movq %%mm2, %%mm1 \n\t" /* B */\
780 "movq %%mm5, %%mm6 \n\t" /* R */\
781 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
782 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
783 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
784 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
785 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
786 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
787 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
788 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
789 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
790 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
99d2cb72 791\
2da0d70d
DB
792 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
793 "movq %%mm2, %%mm6 \n\t" /* 0RGB0RGB 1 */\
794 "movq %%mm1, %%mm5 \n\t" /* 0RGB0RGB 2 */\
795 "movq %%mm3, %%mm7 \n\t" /* 0RGB0RGB 3 */\
99d2cb72 796\
2da0d70d
DB
797 "psllq $40, %%mm0 \n\t" /* RGB00000 0 */\
798 "psllq $40, %%mm2 \n\t" /* RGB00000 1 */\
799 "psllq $40, %%mm1 \n\t" /* RGB00000 2 */\
800 "psllq $40, %%mm3 \n\t" /* RGB00000 3 */\
99d2cb72 801\
2da0d70d
DB
802 "punpckhdq %%mm4, %%mm0 \n\t" /* 0RGBRGB0 0 */\
803 "punpckhdq %%mm6, %%mm2 \n\t" /* 0RGBRGB0 1 */\
804 "punpckhdq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */\
805 "punpckhdq %%mm7, %%mm3 \n\t" /* 0RGBRGB0 3 */\
99d2cb72 806\
2da0d70d
DB
807 "psrlq $8, %%mm0 \n\t" /* 00RGBRGB 0 */\
808 "movq %%mm2, %%mm6 \n\t" /* 0RGBRGB0 1 */\
809 "psllq $40, %%mm2 \n\t" /* GB000000 1 */\
810 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\
811 MOVNTQ(%%mm0, (dst))\
99d2cb72 812\
2da0d70d
DB
813 "psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */\
814 "movq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */\
815 "psllq $24, %%mm1 \n\t" /* BRGB0000 2 */\
816 "por %%mm1, %%mm6 \n\t" /* BRGBRGBR 1 */\
817 MOVNTQ(%%mm6, 8(dst))\
99d2cb72 818\
2da0d70d
DB
819 "psrlq $40, %%mm5 \n\t" /* 000000RG 2 */\
820 "psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */\
821 "por %%mm3, %%mm5 \n\t" /* RGBRGBRG 2 */\
822 MOVNTQ(%%mm5, 16(dst))\
99d2cb72 823\
2da0d70d 824 "add $24, "#dst" \n\t"\
99d2cb72 825\
2da0d70d
DB
826 "add $8, "#index" \n\t"\
827 "cmp "#dstw", "#index" \n\t"\
828 " jb 1b \n\t"
99d2cb72 829
6542b44e 830#define WRITEBGR24MMX2(dst, dstw, index) \
2da0d70d 831 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
5802683a
RD
832 "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
833 "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
2da0d70d
DB
834 "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\
835 "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\
836 "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\
99d2cb72 837\
2da0d70d
DB
838 "pand %%mm0, %%mm1 \n\t" /* B2 B1 B0 */\
839 "pand %%mm0, %%mm3 \n\t" /* G2 G1 G0 */\
840 "pand %%mm7, %%mm6 \n\t" /* R1 R0 */\
99d2cb72 841\
2da0d70d
DB
842 "psllq $8, %%mm3 \n\t" /* G2 G1 G0 */\
843 "por %%mm1, %%mm6 \n\t"\
844 "por %%mm3, %%mm6 \n\t"\
845 MOVNTQ(%%mm6, (dst))\
99d2cb72 846\
2da0d70d
DB
847 "psrlq $8, %%mm4 \n\t" /* 00 G7 G6 G5 G4 G3 G2 G1 */\
848 "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4 B3 B2 B3 B2 */\
849 "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\
850 "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\
99d2cb72 851\
5802683a 852 "pand "MANGLE(ff_M24B)", %%mm1 \n\t" /* B5 B4 B3 */\
2da0d70d
DB
853 "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\
854 "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\
99d2cb72 855\
2da0d70d
DB
856 "por %%mm1, %%mm3 \n\t" /* B5 G4 B4 G3 B3 */\
857 "por %%mm3, %%mm6 \n\t"\
858 MOVNTQ(%%mm6, 8(dst))\
99d2cb72 859\
2da0d70d
DB
860 "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6 B7 B6 B6 B7 */\
861 "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7 G6 G5 G6 G5 */\
862 "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6 R5 R4 R5 R4 */\
99d2cb72 863\
2da0d70d
DB
864 "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\
865 "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\
5802683a 866 "pand "MANGLE(ff_M24B)", %%mm6 \n\t" /* R7 R6 R5 */\
99d2cb72 867\
2da0d70d
DB
868 "por %%mm1, %%mm3 \n\t"\
869 "por %%mm3, %%mm6 \n\t"\
870 MOVNTQ(%%mm6, 16(dst))\
99d2cb72 871\
2da0d70d 872 "add $24, "#dst" \n\t"\
99d2cb72 873\
2da0d70d
DB
874 "add $8, "#index" \n\t"\
875 "cmp "#dstw", "#index" \n\t"\
876 " jb 1b \n\t"
99d2cb72 877
94daf2e9 878#if COMPILE_TEMPLATE_MMX2
7630f2e0 879#undef WRITEBGR24
6e1c66bc 880#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index)
99d2cb72 881#else
7630f2e0 882#undef WRITEBGR24
6e1c66bc 883#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index)
99d2cb72
MN
884#endif
885
6e1c66bc 886#define REAL_WRITEYUY2(dst, dstw, index) \
2da0d70d
DB
887 "packuswb %%mm3, %%mm3 \n\t"\
888 "packuswb %%mm4, %%mm4 \n\t"\
889 "packuswb %%mm7, %%mm1 \n\t"\
890 "punpcklbw %%mm4, %%mm3 \n\t"\
891 "movq %%mm1, %%mm7 \n\t"\
892 "punpcklbw %%mm3, %%mm1 \n\t"\
893 "punpckhbw %%mm3, %%mm7 \n\t"\
25593e29 894\
2da0d70d
DB
895 MOVNTQ(%%mm1, (dst, index, 2))\
896 MOVNTQ(%%mm7, 8(dst, index, 2))\
25593e29 897\
2da0d70d
DB
898 "add $8, "#index" \n\t"\
899 "cmp "#dstw", "#index" \n\t"\
900 " jb 1b \n\t"
6e1c66bc 901#define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index)
25593e29
MN
902
903
7ac40327
RP
904static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
905 const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc,
6858492e 906 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
38858470 907{
94daf2e9 908#if COMPILE_TEMPLATE_MMX
f433c8ab 909 if(!(c->flags & SWS_BITEXACT)){
14014d47
MN
910 if (c->flags & SWS_ACCURATE_RND){
911 if (uDest){
912 YSCALEYUV2YV12X_ACCURATE( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
913 YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
914 }
6858492e
CS
915 if (CONFIG_SWSCALE_ALPHA && aDest){
916 YSCALEYUV2YV12X_ACCURATE( "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
917 }
bca11e75 918
14014d47
MN
919 YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
920 }else{
921 if (uDest){
922 YSCALEYUV2YV12X( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
923 YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
924 }
6858492e
CS
925 if (CONFIG_SWSCALE_ALPHA && aDest){
926 YSCALEYUV2YV12X( "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
927 }
2da0d70d 928
14014d47
MN
929 YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
930 }
f433c8ab
MN
931 return;
932 }
933#endif
94daf2e9 934#if COMPILE_TEMPLATE_ALTIVEC
a2faa401 935yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
2da0d70d
DB
936 chrFilter, chrSrc, chrFilterSize,
937 dest, uDest, vDest, dstW, chrDstW);
94daf2e9 938#else //COMPILE_TEMPLATE_ALTIVEC
5859233b 939yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
2da0d70d 940 chrFilter, chrSrc, chrFilterSize,
6858492e 941 alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW);
94daf2e9 942#endif //!COMPILE_TEMPLATE_ALTIVEC
c1b0bfb4 943}
2add307d 944
7ac40327
RP
945static inline void RENAME(yuv2nv12X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
946 const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
2da0d70d 947 uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
6118e52e
VS
948{
949yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
2da0d70d
DB
950 chrFilter, chrSrc, chrFilterSize,
951 dest, uDest, dstW, chrDstW, dstFormat);
6118e52e
VS
952}
953
7ac40327 954static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc,
6858492e 955 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
c1b0bfb4 956{
f433c8ab 957 int i;
94daf2e9 958#if COMPILE_TEMPLATE_MMX
f433c8ab 959 if(!(c->flags & SWS_BITEXACT)){
6858492e
CS
960 long p= 4;
961 uint8_t *src[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
962 uint8_t *dst[4]= {aDest, dest, uDest, vDest};
963 x86_reg counter[4]= {dstW, dstW, chrDstW, chrDstW};
2da0d70d 964
14014d47
MN
965 if (c->flags & SWS_ACCURATE_RND){
966 while(p--){
3164d25e
CS
967 if (dst[p]){
968 __asm__ volatile(
969 YSCALEYUV2YV121_ACCURATE
970 :: "r" (src[p]), "r" (dst[p] + counter[p]),
971 "g" (-counter[p])
972 : "%"REG_a
973 );
974 }
6858492e 975 }
14014d47
MN
976 }else{
977 while(p--){
3164d25e
CS
978 if (dst[p]){
979 __asm__ volatile(
980 YSCALEYUV2YV121
981 :: "r" (src[p]), "r" (dst[p] + counter[p]),
982 "g" (-counter[p])
983 : "%"REG_a
984 );
985 }
6858492e 986 }
d78c1ea1 987 }
f433c8ab
MN
988 return;
989 }
990#endif
2da0d70d
DB
991 for (i=0; i<dstW; i++)
992 {
a1f3ffa3 993 int val= (lumSrc[i]+64)>>7;
2da0d70d
DB
994
995 if (val&256){
996 if (val<0) val=0;
997 else val=255;
998 }
999
1000 dest[i]= val;
1001 }
1002
1b0a4572 1003 if (uDest)
2da0d70d
DB
1004 for (i=0; i<chrDstW; i++)
1005 {
a1f3ffa3
MN
1006 int u=(chrSrc[i ]+64)>>7;
1007 int v=(chrSrc[i + VOFW]+64)>>7;
2da0d70d
DB
1008
1009 if ((u|v)&256){
1010 if (u<0) u=0;
1011 else if (u>255) u=255;
1012 if (v<0) v=0;
1013 else if (v>255) v=255;
1014 }
1015
1016 uDest[i]= u;
1017 vDest[i]= v;
1018 }
6858492e
CS
1019
1020 if (CONFIG_SWSCALE_ALPHA && aDest)
1021 for (i=0; i<dstW; i++){
1022 int val= (alpSrc[i]+64)>>7;
1023 aDest[i]= av_clip_uint8(val);
1024 }
38858470
MN
1025}
1026
c1b0bfb4 1027
d604bab9
MN
1028/**
1029 * vertical scale YV12 to RGB
1030 */
7ac40327
RP
1031static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
1032 const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
1033 const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
c1b0bfb4 1034{
94daf2e9 1035#if COMPILE_TEMPLATE_MMX
d0ce212a 1036 x86_reg dummy=0;
f433c8ab 1037 if(!(c->flags & SWS_BITEXACT)){
14014d47
MN
1038 if (c->flags & SWS_ACCURATE_RND){
1039 switch(c->dstFormat){
1040 case PIX_FMT_RGB32:
6858492e
CS
1041 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1042 YSCALEYUV2PACKEDX_ACCURATE
1043 YSCALEYUV2RGBX
1044 "movq %%mm2, "U_TEMP"(%0) \n\t"
1045 "movq %%mm4, "V_TEMP"(%0) \n\t"
1046 "movq %%mm5, "Y_TEMP"(%0) \n\t"
1047 YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET)
1048 "movq "Y_TEMP"(%0), %%mm5 \n\t"
1049 "psraw $3, %%mm1 \n\t"
1050 "psraw $3, %%mm7 \n\t"
1051 "packuswb %%mm7, %%mm1 \n\t"
1052 WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
1053
1054 YSCALEYUV2PACKEDX_END
1055 }else{
3164d25e
CS
1056 YSCALEYUV2PACKEDX_ACCURATE
1057 YSCALEYUV2RGBX
1058 "pcmpeqd %%mm7, %%mm7 \n\t"
1059 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
2da0d70d 1060
3164d25e 1061 YSCALEYUV2PACKEDX_END
6858492e 1062 }
14014d47
MN
1063 return;
1064 case PIX_FMT_BGR24:
1065 YSCALEYUV2PACKEDX_ACCURATE
1066 YSCALEYUV2RGBX
40494418 1067 "pxor %%mm7, %%mm7 \n\t"
14014d47
MN
1068 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
1069 "add %4, %%"REG_c" \n\t"
1070 WRITEBGR24(%%REGc, %5, %%REGa)
2da0d70d
DB
1071
1072
14014d47
MN
1073 :: "r" (&c->redDither),
1074 "m" (dummy), "m" (dummy), "m" (dummy),
1075 "r" (dest), "m" (dstW)
1076 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
1077 );
1078 return;
1079 case PIX_FMT_RGB555:
1080 YSCALEYUV2PACKEDX_ACCURATE
1081 YSCALEYUV2RGBX
40494418 1082 "pxor %%mm7, %%mm7 \n\t"
14014d47 1083 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
bca11e75 1084#ifdef DITHER1XBPP
88e2a9ae
CEH
1085 "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
1086 "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
1087 "paddusb "RED_DITHER"(%0), %%mm5\n\t"
2da0d70d
DB
1088#endif
1089
14014d47
MN
1090 WRITERGB15(%4, %5, %%REGa)
1091 YSCALEYUV2PACKEDX_END
1092 return;
1093 case PIX_FMT_RGB565:
1094 YSCALEYUV2PACKEDX_ACCURATE
1095 YSCALEYUV2RGBX
40494418 1096 "pxor %%mm7, %%mm7 \n\t"
14014d47 1097 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
bca11e75 1098#ifdef DITHER1XBPP
88e2a9ae
CEH
1099 "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
1100 "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
1101 "paddusb "RED_DITHER"(%0), %%mm5\n\t"
2da0d70d
DB
1102#endif
1103
14014d47
MN
1104 WRITERGB16(%4, %5, %%REGa)
1105 YSCALEYUV2PACKEDX_END
1106 return;
1107 case PIX_FMT_YUYV422:
1108 YSCALEYUV2PACKEDX_ACCURATE
1109 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1110
1111 "psraw $3, %%mm3 \n\t"
1112 "psraw $3, %%mm4 \n\t"
1113 "psraw $3, %%mm1 \n\t"
1114 "psraw $3, %%mm7 \n\t"
1115 WRITEYUY2(%4, %5, %%REGa)
1116 YSCALEYUV2PACKEDX_END
1117 return;
1118 }
1119 }else{
1120 switch(c->dstFormat)
1121 {
1122 case PIX_FMT_RGB32:
6858492e
CS
1123 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1124 YSCALEYUV2PACKEDX
1125 YSCALEYUV2RGBX
1126 YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
1127 "psraw $3, %%mm1 \n\t"
1128 "psraw $3, %%mm7 \n\t"
1129 "packuswb %%mm7, %%mm1 \n\t"
1130 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
1131 YSCALEYUV2PACKEDX_END
1132 }else{
3164d25e
CS
1133 YSCALEYUV2PACKEDX
1134 YSCALEYUV2RGBX
1135 "pcmpeqd %%mm7, %%mm7 \n\t"
1136 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1137 YSCALEYUV2PACKEDX_END
6858492e 1138 }
14014d47
MN
1139 return;
1140 case PIX_FMT_BGR24:
1141 YSCALEYUV2PACKEDX
1142 YSCALEYUV2RGBX
40494418 1143 "pxor %%mm7, %%mm7 \n\t"
14014d47
MN
1144 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t" //FIXME optimize
1145 "add %4, %%"REG_c" \n\t"
1146 WRITEBGR24(%%REGc, %5, %%REGa)
1147
1148 :: "r" (&c->redDither),
1149 "m" (dummy), "m" (dummy), "m" (dummy),
1150 "r" (dest), "m" (dstW)
1151 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
1152 );
1153 return;
1154 case PIX_FMT_RGB555:
1155 YSCALEYUV2PACKEDX
1156 YSCALEYUV2RGBX
40494418 1157 "pxor %%mm7, %%mm7 \n\t"
14014d47 1158 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
c1b0bfb4 1159#ifdef DITHER1XBPP
88e2a9ae
CEH
1160 "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
1161 "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
1162 "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
2da0d70d
DB
1163#endif
1164
14014d47
MN
1165 WRITERGB15(%4, %5, %%REGa)
1166 YSCALEYUV2PACKEDX_END
1167 return;
1168 case PIX_FMT_RGB565:
1169 YSCALEYUV2PACKEDX
1170 YSCALEYUV2RGBX
40494418 1171 "pxor %%mm7, %%mm7 \n\t"
14014d47 1172 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
c1b0bfb4 1173#ifdef DITHER1XBPP
88e2a9ae
CEH
1174 "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
1175 "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
1176 "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
2da0d70d
DB
1177#endif
1178
14014d47
MN
1179 WRITERGB16(%4, %5, %%REGa)
1180 YSCALEYUV2PACKEDX_END
1181 return;
1182 case PIX_FMT_YUYV422:
1183 YSCALEYUV2PACKEDX
1184 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1185
1186 "psraw $3, %%mm3 \n\t"
1187 "psraw $3, %%mm4 \n\t"
1188 "psraw $3, %%mm1 \n\t"
1189 "psraw $3, %%mm7 \n\t"
1190 WRITEYUY2(%4, %5, %%REGa)
1191 YSCALEYUV2PACKEDX_END
1192 return;
1193 }
bca11e75
MN
1194 }
1195 }
94daf2e9
RP
1196#endif /* COMPILE_TEMPLATE_MMX */
1197#if COMPILE_TEMPLATE_ALTIVEC
2da0d70d 1198 /* The following list of supported dstFormat values should
780daf2b 1199 match what's found in the body of ff_yuv2packedX_altivec() */
d55ef636 1200 if (!(c->flags & SWS_BITEXACT) && !c->alpPixBuf &&
12794f73 1201 (c->dstFormat==PIX_FMT_ABGR || c->dstFormat==PIX_FMT_BGRA ||
2da0d70d 1202 c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
12794f73 1203 c->dstFormat==PIX_FMT_RGBA || c->dstFormat==PIX_FMT_ARGB))
780daf2b
DB
1204 ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize,
1205 chrFilter, chrSrc, chrFilterSize,
1206 dest, dstW, dstY);
2da0d70d
DB
1207 else
1208#endif
1209 yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
1210 chrFilter, chrSrc, chrFilterSize,
6858492e 1211 alpSrc, dest, dstW, dstY);
c1b0bfb4
MN
1212}
1213
c1b0bfb4
MN
1214/**
1215 * vertical bilinear scale YV12 to RGB
1216 */
7ac40327
RP
1217static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
1218 const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
d604bab9 1219{
ac0ad729
MN
1220 int yalpha1=4095- yalpha;
1221 int uvalpha1=4095-uvalpha;
2da0d70d 1222 int i;
d604bab9 1223
94daf2e9 1224#if COMPILE_TEMPLATE_MMX
f433c8ab 1225 if(!(c->flags & SWS_BITEXACT)){
2da0d70d
DB
1226 switch(c->dstFormat)
1227 {
1228 //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
1229 case PIX_FMT_RGB32:
6858492e
CS
1230 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1231#if ARCH_X86_64
1232 __asm__ volatile(
6858492e
CS
1233 YSCALEYUV2RGB(%%REGBP, %5)
1234 YSCALEYUV2RGB_YA(%%REGBP, %5, %6, %7)
1235 "psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1236 "psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1237 "packuswb %%mm7, %%mm1 \n\t"
04ef1d3f 1238 WRITEBGR32(%4, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
6858492e 1239
04ef1d3f 1240 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "r" (dest),
6858492e
CS
1241 "a" (&c->redDither)
1242 ,"r" (abuf0), "r" (abuf1)
04ef1d3f 1243 : "%"REG_BP
6858492e
CS
1244 );
1245#else
1246 *(uint16_t **)(&c->u_temp)=abuf0;
1247 *(uint16_t **)(&c->v_temp)=abuf1;
1248 __asm__ volatile(
1249 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1250 "mov %4, %%"REG_b" \n\t"
1251 "push %%"REG_BP" \n\t"
1252 YSCALEYUV2RGB(%%REGBP, %5)
1253 "push %0 \n\t"
1254 "push %1 \n\t"
1255 "mov "U_TEMP"(%5), %0 \n\t"
1256 "mov "V_TEMP"(%5), %1 \n\t"
1257 YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1)
1258 "psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1259 "psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1260 "packuswb %%mm7, %%mm1 \n\t"
1261 "pop %1 \n\t"
1262 "pop %0 \n\t"
1263 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
1264 "pop %%"REG_BP" \n\t"
1265 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1266
1267 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1268 "a" (&c->redDither)
1269 );
1270#endif
1271 }else{
3164d25e
CS
1272 __asm__ volatile(
1273 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1274 "mov %4, %%"REG_b" \n\t"
1275 "push %%"REG_BP" \n\t"
1276 YSCALEYUV2RGB(%%REGBP, %5)
1277 "pcmpeqd %%mm7, %%mm7 \n\t"
1278 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1279 "pop %%"REG_BP" \n\t"
1280 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
2da0d70d 1281
3164d25e
CS
1282 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1283 "a" (&c->redDither)
1284 );
6858492e 1285 }
2da0d70d
DB
1286 return;
1287 case PIX_FMT_BGR24:
7ad6469e 1288 __asm__ volatile(
2da0d70d
DB
1289 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1290 "mov %4, %%"REG_b" \n\t"
1291 "push %%"REG_BP" \n\t"
1292 YSCALEYUV2RGB(%%REGBP, %5)
40494418 1293 "pxor %%mm7, %%mm7 \n\t"
2da0d70d
DB
1294 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1295 "pop %%"REG_BP" \n\t"
1296 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1297 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1298 "a" (&c->redDither)
1299 );
1300 return;
27a90b04 1301 case PIX_FMT_RGB555:
7ad6469e 1302 __asm__ volatile(
2da0d70d
DB
1303 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1304 "mov %4, %%"REG_b" \n\t"
1305 "push %%"REG_BP" \n\t"
1306 YSCALEYUV2RGB(%%REGBP, %5)
40494418 1307 "pxor %%mm7, %%mm7 \n\t"
2da0d70d 1308 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
d604bab9 1309#ifdef DITHER1XBPP
88e2a9ae
CEH
1310 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1311 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1312 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
2da0d70d
DB
1313#endif
1314
27a90b04 1315 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
2da0d70d
DB
1316 "pop %%"REG_BP" \n\t"
1317 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1318
1319 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1320 "a" (&c->redDither)
1321 );
1322 return;
27a90b04 1323 case PIX_FMT_RGB565:
7ad6469e 1324 __asm__ volatile(
2da0d70d
DB
1325 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1326 "mov %4, %%"REG_b" \n\t"
1327 "push %%"REG_BP" \n\t"
1328 YSCALEYUV2RGB(%%REGBP, %5)
40494418 1329 "pxor %%mm7, %%mm7 \n\t"
2da0d70d 1330 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
d604bab9 1331#ifdef DITHER1XBPP
88e2a9ae
CEH
1332 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1333 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1334 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
2da0d70d
DB
1335#endif
1336
27a90b04 1337 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
2da0d70d
DB
1338 "pop %%"REG_BP" \n\t"
1339 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1340 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1341 "a" (&c->redDither)
1342 );
1343 return;
1344 case PIX_FMT_YUYV422:
7ad6469e 1345 __asm__ volatile(
2da0d70d
DB
1346 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1347 "mov %4, %%"REG_b" \n\t"
1348 "push %%"REG_BP" \n\t"
1349 YSCALEYUV2PACKED(%%REGBP, %5)
1350 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1351 "pop %%"REG_BP" \n\t"
1352 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1353 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1354 "a" (&c->redDither)
1355 );
1356 return;
1357 default: break;
1358 }
f433c8ab 1359 }
94daf2e9 1360#endif //COMPILE_TEMPLATE_MMX
6858492e 1361YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
d604bab9
MN
1362}
1363
1364/**
1365 * YV12 to RGB without scaling or interpolating
1366 */
7ac40327
RP
1367static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
1368 const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y)
d604bab9 1369{
2da0d70d
DB
1370 const int yalpha1=0;
1371 int i;
6a4970ab 1372
7ac40327 1373 const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
2da0d70d 1374 const int yalpha= 4096; //FIXME ...
96034638 1375
2da0d70d
DB
1376 if (flags&SWS_FULL_CHR_H_INT)
1377 {
40fa5140 1378 c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y);
2da0d70d
DB
1379 return;
1380 }
397c035e 1381
94daf2e9 1382#if COMPILE_TEMPLATE_MMX
f433c8ab 1383 if(!(flags & SWS_BITEXACT)){
14014d47 1384 if (uvalpha < 2048) // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
2da0d70d 1385 {
14014d47
MN
1386 switch(dstFormat)
1387 {
1388 case PIX_FMT_RGB32:
6858492e
CS
1389 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1390 __asm__ volatile(
1391 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1392 "mov %4, %%"REG_b" \n\t"
1393 "push %%"REG_BP" \n\t"
1394 YSCALEYUV2RGB1(%%REGBP, %5)
1395 YSCALEYUV2RGB1_ALPHA(%%REGBP)
1396 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1397 "pop %%"REG_BP" \n\t"
1398 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1399
1400 :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1401 "a" (&c->redDither)
1402 );
1403 }else{
3164d25e
CS
1404 __asm__ volatile(
1405 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1406 "mov %4, %%"REG_b" \n\t"
1407 "push %%"REG_BP" \n\t"
1408 YSCALEYUV2RGB1(%%REGBP, %5)
1409 "pcmpeqd %%mm7, %%mm7 \n\t"
1410 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1411 "pop %%"REG_BP" \n\t"
1412 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
14014d47 1413
3164d25e
CS
1414 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1415 "a" (&c->redDither)
1416 );
6858492e 1417 }
14014d47
MN
1418 return;
1419 case PIX_FMT_BGR24:
7ad6469e 1420 __asm__ volatile(
14014d47
MN
1421 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1422 "mov %4, %%"REG_b" \n\t"
1423 "push %%"REG_BP" \n\t"
1424 YSCALEYUV2RGB1(%%REGBP, %5)
40494418 1425 "pxor %%mm7, %%mm7 \n\t"
14014d47
MN
1426 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1427 "pop %%"REG_BP" \n\t"
1428 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1429
1430 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1431 "a" (&c->redDither)
1432 );
1433 return;
1434 case PIX_FMT_RGB555:
7ad6469e 1435 __asm__ volatile(
14014d47
MN
1436 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1437 "mov %4, %%"REG_b" \n\t"
1438 "push %%"REG_BP" \n\t"
1439 YSCALEYUV2RGB1(%%REGBP, %5)
40494418 1440 "pxor %%mm7, %%mm7 \n\t"
14014d47 1441 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
d604bab9 1442#ifdef DITHER1XBPP
88e2a9ae
CEH
1443 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1444 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1445 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
2da0d70d 1446#endif
14014d47
MN
1447 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
1448 "pop %%"REG_BP" \n\t"
1449 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
2da0d70d 1450
14014d47
MN
1451 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1452 "a" (&c->redDither)
1453 );
1454 return;
1455 case PIX_FMT_RGB565:
7ad6469e 1456 __asm__ volatile(
14014d47
MN
1457 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1458 "mov %4, %%"REG_b" \n\t"
1459 "push %%"REG_BP" \n\t"
1460 YSCALEYUV2RGB1(%%REGBP, %5)
40494418 1461 "pxor %%mm7, %%mm7 \n\t"
14014d47 1462 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
d604bab9 1463#ifdef DITHER1XBPP
88e2a9ae
CEH
1464 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1465 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1466 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
2da0d70d
DB
1467#endif
1468
14014d47
MN
1469 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
1470 "pop %%"REG_BP" \n\t"
1471 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
2da0d70d 1472
14014d47
MN
1473 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1474 "a" (&c->redDither)
1475 );
1476 return;
1477 case PIX_FMT_YUYV422:
7ad6469e 1478 __asm__ volatile(
14014d47
MN
1479 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1480 "mov %4, %%"REG_b" \n\t"
1481 "push %%"REG_BP" \n\t"
1482 YSCALEYUV2PACKED1(%%REGBP, %5)
1483 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1484 "pop %%"REG_BP" \n\t"
1485 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1486
1487 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1488 "a" (&c->redDither)
1489 );
1490 return;
1491 }
2da0d70d 1492 }
14014d47 1493 else
2da0d70d 1494 {
14014d47
MN
1495 switch(dstFormat)
1496 {
1497 case PIX_FMT_RGB32:
6858492e
CS
1498 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1499 __asm__ volatile(
1500 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1501 "mov %4, %%"REG_b" \n\t"
1502 "push %%"REG_BP" \n\t"
1503 YSCALEYUV2RGB1b(%%REGBP, %5)
1504 YSCALEYUV2RGB1_ALPHA(%%REGBP)
1505 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1506 "pop %%"REG_BP" \n\t"
1507 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1508
1509 :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1510 "a" (&c->redDither)
1511 );
1512 }else{
3164d25e
CS
1513 __asm__ volatile(
1514 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1515 "mov %4, %%"REG_b" \n\t"
1516 "push %%"REG_BP" \n\t"
1517 YSCALEYUV2RGB1b(%%REGBP, %5)
1518 "pcmpeqd %%mm7, %%mm7 \n\t"
1519 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1520 "pop %%"REG_BP" \n\t"
1521 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
14014d47 1522
3164d25e
CS
1523 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1524 "a" (&c->redDither)
1525 );
6858492e 1526 }
14014d47
MN
1527 return;
1528 case PIX_FMT_BGR24:
7ad6469e 1529 __asm__ volatile(
14014d47
MN
1530 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1531 "mov %4, %%"REG_b" \n\t"
1532 "push %%"REG_BP" \n\t"
1533 YSCALEYUV2RGB1b(%%REGBP, %5)
40494418 1534 "pxor %%mm7, %%mm7 \n\t"
14014d47
MN
1535 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1536 "pop %%"REG_BP" \n\t"
1537 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1538
1539 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1540 "a" (&c->redDither)
1541 );
1542 return;
1543 case PIX_FMT_RGB555:
7ad6469e 1544 __asm__ volatile(
14014d47
MN
1545 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1546 "mov %4, %%"REG_b" \n\t"
1547 "push %%"REG_BP" \n\t"
1548 YSCALEYUV2RGB1b(%%REGBP, %5)
40494418 1549 "pxor %%mm7, %%mm7 \n\t"
14014d47 1550 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
497d4f99 1551#ifdef DITHER1XBPP
88e2a9ae
CEH
1552 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1553 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1554 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
2da0d70d 1555#endif
14014d47
MN
1556 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
1557 "pop %%"REG_BP" \n\t"
1558 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
2da0d70d 1559
14014d47
MN
1560 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1561 "a" (&c->redDither)
1562 );
1563 return;
1564 case PIX_FMT_RGB565:
7ad6469e 1565 __asm__ volatile(
14014d47
MN
1566 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1567 "mov %4, %%"REG_b" \n\t"
1568 "push %%"REG_BP" \n\t"
1569 YSCALEYUV2RGB1b(%%REGBP, %5)
40494418 1570 "pxor %%mm7, %%mm7 \n\t"
14014d47 1571 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
497d4f99 1572#ifdef DITHER1XBPP
88e2a9ae
CEH
1573 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1574 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1575 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
2da0d70d
DB
1576#endif
1577
14014d47
MN
1578 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
1579 "pop %%"REG_BP" \n\t"
1580 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
2da0d70d 1581
14014d47
MN
1582 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1583 "a" (&c->redDither)
1584 );
1585 return;
1586 case PIX_FMT_YUYV422:
7ad6469e 1587 __asm__ volatile(
14014d47
MN
1588 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1589 "mov %4, %%"REG_b" \n\t"
1590 "push %%"REG_BP" \n\t"
1591 YSCALEYUV2PACKED1b(%%REGBP, %5)
1592 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1593 "pop %%"REG_BP" \n\t"
1594 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1595
1596 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1597 "a" (&c->redDither)
1598 );
1599 return;
1600 }
2da0d70d
DB
1601 }
1602 }
94daf2e9 1603#endif /* COMPILE_TEMPLATE_MMX */
e5091488 1604 if (uvalpha < 2048)
2da0d70d 1605 {
6858492e 1606 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
2da0d70d 1607 }else{
6858492e 1608 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
2da0d70d 1609 }
d604bab9
MN
1610}
1611
8a322796 1612//FIXME yuy2* can read up to 7 samples too much
6ff0ad6b 1613
7ac40327 1614static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
1e621b18 1615{
94daf2e9 1616#if COMPILE_TEMPLATE_MMX
7ad6469e 1617 __asm__ volatile(
2da0d70d
DB
1618 "movq "MANGLE(bm01010101)", %%mm2 \n\t"
1619 "mov %0, %%"REG_a" \n\t"
1620 "1: \n\t"
1621 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
1622 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
1623 "pand %%mm2, %%mm0 \n\t"
1624 "pand %%mm2, %%mm1 \n\t"
1625 "packuswb %%mm1, %%mm0 \n\t"
1626 "movq %%mm0, (%2, %%"REG_a") \n\t"
1627 "add $8, %%"REG_a" \n\t"
1628 " js 1b \n\t"
d0ce212a 1629 : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width)
2da0d70d
DB
1630 : "%"REG_a
1631 );
1e621b18 1632#else
2da0d70d
DB
1633 int i;
1634 for (i=0; i<width; i++)
1635 dst[i]= src[2*i];
1e621b18
MN
1636#endif
1637}
1638
7ac40327 1639static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
1e621b18 1640{
94daf2e9 1641#if COMPILE_TEMPLATE_MMX
7ad6469e 1642 __asm__ volatile(
2da0d70d
DB
1643 "movq "MANGLE(bm01010101)", %%mm4 \n\t"
1644 "mov %0, %%"REG_a" \n\t"
1645 "1: \n\t"
1646 "movq (%1, %%"REG_a",4), %%mm0 \n\t"
1647 "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
1648 "psrlw $8, %%mm0 \n\t"
1649 "psrlw $8, %%mm1 \n\t"
1650 "packuswb %%mm1, %%mm0 \n\t"
1651 "movq %%mm0, %%mm1 \n\t"
1652 "psrlw $8, %%mm0 \n\t"
1653 "pand %%mm4, %%mm1 \n\t"
1654 "packuswb %%mm0, %%mm0 \n\t"
1655 "packuswb %%mm1, %%mm1 \n\t"
1656 "movd %%mm0, (%3, %%"REG_a") \n\t"
1657 "movd %%mm1, (%2, %%"REG_a") \n\t"
1658 "add $4, %%"REG_a" \n\t"
1659 " js 1b \n\t"
d0ce212a 1660 : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
2da0d70d
DB
1661 : "%"REG_a
1662 );
1e621b18 1663#else
2da0d70d
DB
1664 int i;
1665 for (i=0; i<width; i++)
1666 {
1667 dstU[i]= src1[4*i + 1];
1668 dstV[i]= src1[4*i + 3];
1669 }
1670#endif
1671 assert(src1 == src2);
1e621b18
MN
1672}
1673
de1275d5
MN
1674static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
1675{
94daf2e9 1676#if COMPILE_TEMPLATE_MMX
de1275d5
MN
1677 __asm__ volatile(
1678 "mov %0, %%"REG_a" \n\t"
1679 "1: \n\t"
1680 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
1681 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
1682 "movq (%2, %%"REG_a",2), %%mm2 \n\t"
1683 "movq 8(%2, %%"REG_a",2), %%mm3 \n\t"
1684 "psrlw $8, %%mm0 \n\t"
1685 "psrlw $8, %%mm1 \n\t"
1686 "psrlw $8, %%mm2 \n\t"
1687 "psrlw $8, %%mm3 \n\t"
1688 "packuswb %%mm1, %%mm0 \n\t"
1689 "packuswb %%mm3, %%mm2 \n\t"
1690 "movq %%mm0, (%3, %%"REG_a") \n\t"
1691 "movq %%mm2, (%4, %%"REG_a") \n\t"
1692 "add $8, %%"REG_a" \n\t"
1693 " js 1b \n\t"
1694 : : "g" ((x86_reg)-width), "r" (src1+width*2), "r" (src2+width*2), "r" (dstU+width), "r" (dstV+width)
1695 : "%"REG_a
1696 );
1697#else
1698 int i;
1699 for (i=0; i<width; i++)
1700 {
1701 dstU[i]= src1[2*i + 1];
1702 dstV[i]= src2[2*i + 1];
1703 }
1704#endif
1705}
1706
4cf16bbe
DB
1707/* This is almost identical to the previous, end exists only because
1708 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
7ac40327 1709static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
7322a67c 1710{
94daf2e9 1711#if COMPILE_TEMPLATE_MMX
7ad6469e 1712 __asm__ volatile(
2da0d70d
DB
1713 "mov %0, %%"REG_a" \n\t"
1714 "1: \n\t"
1715 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
1716 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
1717 "psrlw $8, %%mm0 \n\t"
1718 "psrlw $8, %%mm1 \n\t"
1719 "packuswb %%mm1, %%mm0 \n\t"
1720 "movq %%mm0, (%2, %%"REG_a") \n\t"
1721 "add $8, %%"REG_a" \n\t"
1722 " js 1b \n\t"
d0ce212a 1723 : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width)
2da0d70d
DB
1724 : "%"REG_a
1725 );
7322a67c 1726#else
2da0d70d
DB
1727 int i;
1728 for (i=0; i<width; i++)
1729 dst[i]= src[2*i+1];
7322a67c
MN
1730#endif
1731}
1732
7ac40327 1733static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
7322a67c 1734{
94daf2e9 1735#if COMPILE_TEMPLATE_MMX
7ad6469e 1736 __asm__ volatile(
2da0d70d
DB
1737 "movq "MANGLE(bm01010101)", %%mm4 \n\t"
1738 "mov %0, %%"REG_a" \n\t"
1739 "1: \n\t"
1740 "movq (%1, %%"REG_a",4), %%mm0 \n\t"
1741 "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
1742 "pand %%mm4, %%mm0 \n\t"
1743 "pand %%mm4, %%mm1 \n\t"
1744 "packuswb %%mm1, %%mm0 \n\t"
1745 "movq %%mm0, %%mm1 \n\t"
1746 "psrlw $8, %%mm0 \n\t"
1747 "pand %%mm4, %%mm1 \n\t"
1748 "packuswb %%mm0, %%mm0 \n\t"
1749 "packuswb %%mm1, %%mm1 \n\t"
1750 "movd %%mm0, (%3, %%"REG_a") \n\t"
1751 "movd %%mm1, (%2, %%"REG_a") \n\t"
1752 "add $4, %%"REG_a" \n\t"
1753 " js 1b \n\t"
d0ce212a 1754 : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
2da0d70d
DB
1755 : "%"REG_a
1756 );
7322a67c 1757#else
2da0d70d
DB
1758 int i;
1759 for (i=0; i<width; i++)
1760 {
1761 dstU[i]= src1[4*i + 0];
1762 dstV[i]= src1[4*i + 2];
1763 }
1764#endif
1765 assert(src1 == src2);
7322a67c
MN
1766}
1767
de1275d5
MN
1768static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
1769{
94daf2e9 1770#if COMPILE_TEMPLATE_MMX
de1275d5
MN
1771 __asm__ volatile(
1772 "movq "MANGLE(bm01010101)", %%mm4 \n\t"
1773 "mov %0, %%"REG_a" \n\t"
1774 "1: \n\t"
1775 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
1776 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
1777 "movq (%2, %%"REG_a",2), %%mm2 \n\t"
1778 "movq 8(%2, %%"REG_a",2), %%mm3 \n\t"
1779 "pand %%mm4, %%mm0 \n\t"
1780 "pand %%mm4, %%mm1 \n\t"
1781 "pand %%mm4, %%mm2 \n\t"
1782 "pand %%mm4, %%mm3 \n\t"
1783 "packuswb %%mm1, %%mm0 \n\t"
1784 "packuswb %%mm3, %%mm2 \n\t"
1785 "movq %%mm0, (%3, %%"REG_a") \n\t"
1786 "movq %%mm2, (%4, %%"REG_a") \n\t"
1787 "add $8, %%"REG_a" \n\t"
1788 " js 1b \n\t"
1789 : : "g" ((x86_reg)-width), "r" (src1+width*2), "r" (src2+width*2), "r" (dstU+width), "r" (dstV+width)
1790 : "%"REG_a
1791 );
1792#else
1793 int i;
1794 for (i=0; i<width; i++)
1795 {
1796 dstU[i]= src1[2*i];
1797 dstV[i]= src2[2*i];
1798 }
1799#endif
1800}
1801
94daf2e9 1802#if COMPILE_TEMPLATE_MMX
7ac40327 1803static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, long width, int srcFormat)
dfb09bd1
MN
1804{
1805
1806 if(srcFormat == PIX_FMT_BGR24){
7ad6469e 1807 __asm__ volatile(
ff9a056d
MN
1808 "movq "MANGLE(ff_bgr24toY1Coeff)", %%mm5 \n\t"
1809 "movq "MANGLE(ff_bgr24toY2Coeff)", %%mm6 \n\t"
1810 :
dfb09bd1
MN
1811 );
1812 }else{
7ad6469e 1813 __asm__ volatile(
ff9a056d
MN
1814 "movq "MANGLE(ff_rgb24toY1Coeff)", %%mm5 \n\t"
1815 "movq "MANGLE(ff_rgb24toY2Coeff)", %%mm6 \n\t"
1816 :
dfb09bd1
MN
1817 );
1818 }
1819
7ad6469e 1820 __asm__ volatile(
dfb09bd1
MN
1821 "movq "MANGLE(ff_bgr24toYOffset)", %%mm4 \n\t"
1822 "mov %2, %%"REG_a" \n\t"
1823 "pxor %%mm7, %%mm7 \n\t"
1824 "1: \n\t"
1825 PREFETCH" 64(%0) \n\t"
1826 "movd (%0), %%mm0 \n\t"
1827 "movd 2(%0), %%mm1 \n\t"
1828 "movd 6(%0), %%mm2 \n\t"
1829 "movd 8(%0), %%mm3 \n\t"
1830 "add $12, %0 \n\t"
1831 "punpcklbw %%mm7, %%mm0 \n\t"
1832 "punpcklbw %%mm7, %%mm1 \n\t"
1833 "punpcklbw %%mm7, %%mm2 \n\t"
1834 "punpcklbw %%mm7, %%mm3 \n\t"
1835 "pmaddwd %%mm5, %%mm0 \n\t"
1836 "pmaddwd %%mm6, %%mm1 \n\t"
1837 "pmaddwd %%mm5, %%mm2 \n\t"
1838 "pmaddwd %%mm6, %%mm3 \n\t"
1839 "paddd %%mm1, %%mm0 \n\t"
1840 "paddd %%mm3, %%mm2 \n\t"
1841 "paddd %%mm4, %%mm0 \n\t"
1842 "paddd %%mm4, %%mm2 \n\t"
1843 "psrad $15, %%mm0 \n\t"
1844 "psrad $15, %%mm2 \n\t"
1845 "packssdw %%mm2, %%mm0 \n\t"
1846 "packuswb %%mm0, %%mm0 \n\t"
1847 "movd %%mm0, (%1, %%"REG_a") \n\t"
1848 "add $4, %%"REG_a" \n\t"
1849 " js 1b \n\t"
1850 : "+r" (src)
d0ce212a 1851 : "r" (dst+width), "g" ((x86_reg)-width)
dfb09bd1 1852 : "%"REG_a
2da0d70d 1853 );
dfb09bd1
MN
1854}
1855
7ac40327 1856static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, long width, int srcFormat)
dfb09bd1 1857{
7ad6469e 1858 __asm__ volatile(
dfb09bd1
MN
1859 "movq 24+%4, %%mm6 \n\t"
1860 "mov %3, %%"REG_a" \n\t"
1861 "pxor %%mm7, %%mm7 \n\t"
1862 "1: \n\t"
1863 PREFETCH" 64(%0) \n\t"
1864 "movd (%0), %%mm0 \n\t"
1865 "movd 2(%0), %%mm1 \n\t"
1866 "punpcklbw %%mm7, %%mm0 \n\t"
1867 "punpcklbw %%mm7, %%mm1 \n\t"
1868 "movq %%mm0, %%mm2 \n\t"
1869 "movq %%mm1, %%mm3 \n\t"
1870 "pmaddwd %4, %%mm0 \n\t"
1871 "pmaddwd 8+%4, %%mm1 \n\t"
1872 "pmaddwd 16+%4, %%mm2 \n\t"
1873 "pmaddwd %%mm6, %%mm3 \n\t"
1874 "paddd %%mm1, %%mm0 \n\t"
1875 "paddd %%mm3, %%mm2 \n\t"
1876
1877 "movd 6(%0), %%mm1 \n\t"
1878 "movd 8(%0), %%mm3 \n\t"
1879 "add $12, %0 \n\t"
1880 "punpcklbw %%mm7, %%mm1 \n\t"
1881 "punpcklbw %%mm7, %%mm3 \n\t"
1882 "movq %%mm1, %%mm4 \n\t"
1883 "movq %%mm3, %%mm5 \n\t"
1884 "pmaddwd %4, %%mm1 \n\t"
1885 "pmaddwd 8+%4, %%mm3 \n\t"
1886 "pmaddwd 16+%4, %%mm4 \n\t"
1887 "pmaddwd %%mm6, %%mm5 \n\t"
1888 "paddd %%mm3, %%mm1 \n\t"
1889 "paddd %%mm5, %%mm4 \n\t"
1890
1891 "movq "MANGLE(ff_bgr24toUVOffset)", %%mm3 \n\t"
1892 "paddd %%mm3, %%mm0 \n\t"
1893 "paddd %%mm3, %%mm2 \n\t"
1894 "paddd %%mm3, %%mm1 \n\t"
1895 "paddd %%mm3, %%mm4 \n\t"
1896 "psrad $15, %%mm0 \n\t"
1897 "psrad $15, %%mm2 \n\t"
1898 "psrad $15, %%mm1 \n\t"
1899 "psrad $15, %%mm4 \n\t"
1900 "packssdw %%mm1, %%mm0 \n\t"
1901 "packssdw %%mm4, %%mm2 \n\t"
1902 "packuswb %%mm0, %%mm0 \n\t"
1903 "packuswb %%mm2, %%mm2 \n\t"
1904 "movd %%mm0, (%1, %%"REG_a") \n\t"
1905 "movd %%mm2, (%2, %%"REG_a") \n\t"
1906 "add $4, %%"REG_a" \n\t"
1907 " js 1b \n\t"
1908 : "+r" (src)
d0ce212a 1909 : "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-width), "m"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24][0])
dfb09bd1
MN
1910 : "%"REG_a
1911 );
1912}
1913#endif
1914
7ac40327 1915static inline void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
dfb09bd1 1916{
94daf2e9 1917#if COMPILE_TEMPLATE_MMX
a35acd7f 1918 RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
1e621b18 1919#else
2da0d70d
DB
1920 int i;
1921 for (i=0; i<width; i++)
1922 {
1923 int b= src[i*3+0];
1924 int g= src[i*3+1];
1925 int r= src[i*3+2];
1e621b18 1926
e5091488 1927 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
2da0d70d 1928 }
94daf2e9 1929#endif /* COMPILE_TEMPLATE_MMX */
1e621b18
MN
1930}
1931
7ac40327 1932static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
1e621b18 1933{
94daf2e9 1934#if COMPILE_TEMPLATE_MMX
a35acd7f 1935 RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
1e621b18 1936#else
2da0d70d
DB
1937 int i;
1938 for (i=0; i<width; i++)
1939 {
dfb09bd1
MN
1940 int b= src1[3*i + 0];
1941 int g= src1[3*i + 1];
1942 int r= src1[3*i + 2];
2da0d70d 1943
dfb09bd1
MN
1944 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1945 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
2da0d70d 1946 }
94daf2e9 1947#endif /* COMPILE_TEMPLATE_MMX */
2da0d70d 1948 assert(src1 == src2);
1e621b18
MN
1949}
1950
7ac40327 1951static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
2f60f629
MN
1952{
1953 int i;
1954 for (i=0; i<width; i++)
1955 {
1956 int b= src1[6*i + 0] + src1[6*i + 3];
1957 int g= src1[6*i + 1] + src1[6*i + 4];
1958 int r= src1[6*i + 2] + src1[6*i + 5];
1959
1960 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1961 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1962 }
1963 assert(src1 == src2);
1964}
1965
7ac40327 1966static inline void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
a861d4d7 1967{
94daf2e9 1968#if COMPILE_TEMPLATE_MMX
a35acd7f 1969 RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
dfb09bd1 1970#else
2da0d70d
DB
1971 int i;
1972 for (i=0; i<width; i++)
1973 {
1974 int r= src[i*3+0];
1975 int g= src[i*3+1];
1976 int b= src[i*3+2];
1977
e5091488 1978 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
2da0d70d 1979 }
dfb09bd1 1980#endif
a861d4d7
MN
1981}
1982
7ac40327 1983static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
a861d4d7 1984{
94daf2e9 1985#if COMPILE_TEMPLATE_MMX
5155b839 1986 assert(src1==src2);
a35acd7f 1987 RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
dfb09bd1 1988#else
5155b839
DB
1989 int i;
1990 assert(src1==src2);
2da0d70d
DB
1991 for (i=0; i<width; i++)
1992 {
dfb09bd1
MN
1993 int r= src1[3*i + 0];
1994 int g= src1[3*i + 1];
1995 int b= src1[3*i + 2];
2da0d70d 1996
dfb09bd1
MN
1997 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1998 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
2da0d70d 1999 }
dfb09bd1 2000#endif
a861d4d7
MN
2001}
2002
7ac40327 2003static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
2f60f629
MN
2004{
2005 int i;
2006 assert(src1==src2);
2007 for (i=0; i<width; i++)
2008 {
e09d7eef
MN
2009 int r= src1[6*i + 0] + src1[6*i + 3];
2010 int g= src1[6*i + 1] + src1[6*i + 4];
2011 int b= src1[6*i + 2] + src1[6*i + 5];
2f60f629
MN
2012
2013 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
2014 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
2015 }
2016}
2017
1e621b18 2018
8a322796 2019// bilinear / bicubic scaling
7ac40327
RP
2020static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW, int xInc,
2021 const int16_t *filter, const int16_t *filterPos, long filterSize)
2ff198c1 2022{
94daf2e9 2023#if COMPILE_TEMPLATE_MMX
2da0d70d
DB
2024 assert(filterSize % 4 == 0 && filterSize>0);
2025 if (filterSize==4) // Always true for upscaling, sometimes for down, too.
2026 {
d0ce212a 2027 x86_reg counter= -2*dstW;
2da0d70d
DB
2028 filter-= counter*2;
2029 filterPos-= counter/2;
2030 dst-= counter/2;
7ad6469e 2031 __asm__ volatile(
83c89c78 2032#if defined(PIC)
2da0d70d
DB
2033 "push %%"REG_b" \n\t"
2034#endif
2035 "pxor %%mm7, %%mm7 \n\t"
2da0d70d
DB
2036 "push %%"REG_BP" \n\t" // we use 7 regs here ...
2037 "mov %%"REG_a", %%"REG_BP" \n\t"
2038 ASMALIGN(4)
2039 "1: \n\t"
2040 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
2041 "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t"
2042 "movq (%1, %%"REG_BP", 4), %%mm1 \n\t"
2043 "movq 8(%1, %%"REG_BP", 4), %%mm3 \n\t"
2044 "movd (%3, %%"REG_a"), %%mm0 \n\t"
2045 "movd (%3, %%"REG_b"), %%mm2 \n\t"
2046 "punpcklbw %%mm7, %%mm0 \n\t"
2047 "punpcklbw %%mm7, %%mm2 \n\t"
2048 "pmaddwd %%mm1, %%mm0 \n\t"
2049 "pmaddwd %%mm2, %%mm3 \n\t"
ef423a66
MN
2050 "movq %%mm0, %%mm4 \n\t"
2051 "punpckldq %%mm3, %%mm0 \n\t"
2052 "punpckhdq %%mm3, %%mm4 \n\t"
2053 "paddd %%mm4, %%mm0 \n\t"
2054 "psrad $7, %%mm0 \n\t"
2da0d70d
DB
2055 "packssdw %%mm0, %%mm0 \n\t"
2056 "movd %%mm0, (%4, %%"REG_BP") \n\t"
2057 "add $4, %%"REG_BP" \n\t"
2058 " jnc 1b \n\t"
2059
2060 "pop %%"REG_BP" \n\t"
83c89c78 2061#if defined(PIC)
2da0d70d 2062 "pop %%"REG_b" \n\t"
83c89c78 2063#endif
2da0d70d
DB
2064 : "+a" (counter)
2065 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
83c89c78 2066#if !defined(PIC)
2da0d70d
DB
2067 : "%"REG_b
2068#endif
2069 );
2070 }
2071 else if (filterSize==8)
2072 {
d0ce212a 2073 x86_reg counter= -2*dstW;
2da0d70d
DB
2074 filter-= counter*4;
2075 filterPos-= counter/2;
2076 dst-= counter/2;
7ad6469e 2077 __asm__ volatile(
83c89c78 2078#if defined(PIC)
2da0d70d
DB
2079 "push %%"REG_b" \n\t"
2080#endif
2081 "pxor %%mm7, %%mm7 \n\t"
2da0d70d
DB
2082 "push %%"REG_BP" \n\t" // we use 7 regs here ...
2083 "mov %%"REG_a", %%"REG_BP" \n\t"
2084 ASMALIGN(4)
2085 "1: \n\t"
2086 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
2087 "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t"
2088 "movq (%1, %%"REG_BP", 8), %%mm1 \n\t"
2089 "movq 16(%1, %%"REG_BP", 8), %%mm3 \n\t"
2090 "movd (%3, %%"REG_a"), %%mm0 \n\t"
2091 "movd (%3, %%"REG_b"), %%mm2 \n\t"
2092 "punpcklbw %%mm7, %%mm0 \n\t"
2093 "punpcklbw %%mm7, %%mm2 \n\t"
2094 "pmaddwd %%mm1, %%mm0 \n\t"
2095 "pmaddwd %%mm2, %%mm3 \n\t"
2096
2097 "movq 8(%1, %%"REG_BP", 8), %%mm1 \n\t"
2098 "movq 24(%1, %%"REG_BP", 8), %%mm5 \n\t"
2099 "movd 4(%3, %%"REG_a"), %%mm4 \n\t"
2100 "movd 4(%3, %%"REG_b"), %%mm2 \n\t"
2101 "punpcklbw %%mm7, %%mm4 \n\t"
2102 "punpcklbw %%mm7, %%mm2 \n\t"
2103 "pmaddwd %%mm1, %%mm4 \n\t"
2104 "pmaddwd %%mm2, %%mm5 \n\t"
2105 "paddd %%mm4, %%mm0 \n\t"
2106 "paddd %%mm5, %%mm3 \n\t"
ef423a66
MN
2107 "movq %%mm0, %%mm4 \n\t"
2108 "punpckldq %%mm3, %%mm0 \n\t"
2109 "punpckhdq %%mm3, %%mm4 \n\t"
2110 "paddd %%mm4, %%mm0 \n\t"
2111 "psrad $7, %%mm0 \n\t"
2da0d70d
DB
2112 "packssdw %%mm0, %%mm0 \n\t"
2113 "movd %%mm0, (%4, %%"REG_BP") \n\t"
2114 "add $4, %%"REG_BP" \n\t"
2115 " jnc 1b \n\t"
2116
2117 "pop %%"REG_BP" \n\t"
83c89c78 2118#if defined(PIC)
2da0d70d 2119 "pop %%"REG_b" \n\t"
83c89c78 2120#endif
2da0d70d
DB
2121 : "+a" (counter)
2122 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
83c89c78 2123#if !defined(PIC)
2da0d70d
DB
2124 : "%"REG_b
2125#endif
2126 );
2127 }
2128 else
2129 {
2130 uint8_t *offset = src+filterSize;
d0ce212a 2131 x86_reg counter= -2*dstW;
2da0d70d
DB
2132 //filter-= counter*filterSize/2;
2133 filterPos-= counter/2;
2134 dst-= counter/2;
7ad6469e 2135 __asm__ volatile(
2da0d70d 2136 "pxor %%mm7, %%mm7 \n\t"
2da0d70d
DB
2137 ASMALIGN(4)
2138 "1: \n\t"
2139 "mov %2, %%"REG_c" \n\t"
2140 "movzwl (%%"REG_c", %0), %%eax \n\t"
2141 "movzwl 2(%%"REG_c", %0), %%edx \n\t"
2142 "mov %5, %%"REG_c" \n\t"
2143 "pxor %%mm4, %%mm4 \n\t"
2144 "pxor %%mm5, %%mm5 \n\t"
2145 "2: \n\t"
2146 "movq (%1), %%mm1 \n\t"
2147 "movq (%1, %6), %%mm3 \n\t"
2148 "movd (%%"REG_c", %%"REG_a"), %%mm0 \n\t"
2149 "movd (%%"REG_c", %%"REG_d"), %%mm2 \n\t"
2150 "punpcklbw %%mm7, %%mm0 \n\t"
2151 "punpcklbw %%mm7, %%mm2 \n\t"
2152 "pmaddwd %%mm1, %%mm0 \n\t"
2153 "pmaddwd %%mm2, %%mm3 \n\t"
2154 "paddd %%mm3, %%mm5 \n\t"
2155 "paddd %%mm0, %%mm4 \n\t"
2156 "add $8, %1 \n\t"
2157 "add $4, %%"REG_c" \n\t"
2158 "cmp %4, %%"REG_c" \n\t"
2159 " jb 2b \n\t"
2160 "add %6, %1 \n\t"
ef423a66
MN
2161 "movq %%mm4, %%mm0 \n\t"
2162 "punpckldq %%mm5, %%mm4 \n\t"
2163 "punpckhdq %%mm5, %%mm0 \n\t"
2164 "paddd %%mm0, %%mm4 \n\t"
2165 "psrad $7, %%mm4 \n\t"
2da0d70d
DB
2166 "packssdw %%mm4, %%mm4 \n\t"
2167 "mov %3, %%"REG_a" \n\t"
2168 "movd %%mm4, (%%"REG_a", %0) \n\t"
2169 "add $4, %0 \n\t"
2170 " jnc 1b \n\t"
2171
2172 : "+r" (counter), "+r" (filter)
2173 : "m" (filterPos), "m" (dst), "m"(offset),
d0ce212a 2174 "m" (src), "r" ((x86_reg)filterSize*2)
2da0d70d
DB
2175 : "%"REG_a, "%"REG_c, "%"REG_d
2176 );
2177 }
077ea8a7 2178#else
94daf2e9 2179#if COMPILE_TEMPLATE_ALTIVEC
2da0d70d 2180 hScale_altivec_real(dst, dstW, src, srcW, xInc, filter, filterPos, filterSize);
8c266f0c 2181#else
2da0d70d
DB
2182 int i;
2183 for (i=0; i<dstW; i++)
2184 {
2185 int j;
2186 int srcPos= filterPos[i];
2187 int val=0;
2188 //printf("filterPos: %d\n", filterPos[i]);
2189 for (j=0; j<filterSize; j++)
2190 {
2191 //printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]);
2192 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
2193 }
2194 //filter += hFilterSize;
881c4294 2195 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
2da0d70d
DB
2196 //dst[i] = val>>7;
2197 }
94daf2e9
RP
2198#endif /* COMPILE_ALTIVEC */
2199#endif /* COMPILE_MMX */
077ea8a7 2200}
392b6567 2201
18c61752
RP
2202#define FAST_BILINEAR_X86 \
2203 "subl %%edi, %%esi \n\t" /* src[xx+1] - src[xx] */ \
2204 "imull %%ecx, %%esi \n\t" /* (src[xx+1] - src[xx])*xalpha */ \
2205 "shll $16, %%edi \n\t" \
2206 "addl %%edi, %%esi \n\t" /* src[xx+1]*xalpha + src[xx]*(1-xalpha) */ \
2207 "mov %1, %%"REG_D"\n\t" \
2208 "shrl $9, %%esi \n\t" \
2209
392b6567 2210static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
7ac40327 2211 int dstWidth, const uint8_t *src, int srcW,
392b6567
RP
2212 int xInc)
2213{
2214 int i;
2215 unsigned int xpos=0;
2216 for (i=0;i<dstWidth;i++)
2217 {
2218 register unsigned int xx=xpos>>16;
2219 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2220 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2221 xpos+=xInc;
2222 }
2223}
2224
2ff198c1 2225 // *** horizontal scale Y line to temp buffer
7ac40327
RP
2226static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src, int srcW, int xInc,
2227 int flags, const int16_t *hLumFilter,
2228 const int16_t *hLumFilterPos, int hLumFilterSize,
95b5770b
RP
2229 int srcFormat, uint8_t *formatConvBuffer,
2230 uint32_t *pal, int isAlpha)
077ea8a7 2231{
fdf70cc5
RP
2232 int32_t av_unused *mmx2FilterPos = c->lumMmx2FilterPos;
2233 int16_t av_unused *mmx2Filter = c->lumMmx2Filter;
2234 int av_unused canMMX2BeUsed = c->canMMX2BeUsed;
bcdedf67 2235 void av_unused *mmx2FilterCode= c->lumMmx2FilterCode;
39e5f87b 2236 void (*internal_func)(uint8_t *, const uint8_t *, long, uint32_t *) = isAlpha ? c->hascale_internal : c->hyscale_internal;
95b5770b 2237
40fa5140
RP
2238 if (isAlpha) {
2239 if (srcFormat == PIX_FMT_RGB32 || srcFormat == PIX_FMT_BGR32 )
2240 src += 3;
2241 } else {
2242 if (srcFormat == PIX_FMT_RGB32_1 || srcFormat == PIX_FMT_BGR32_1)
2243 src += ALT32_CORR;
9990e426 2244 }
40fa5140 2245
e8417235
KS
2246 if (srcFormat == PIX_FMT_RGB48LE)
2247 src++;
2248
39e5f87b
CS
2249 if (internal_func) {
2250 internal_func(formatConvBuffer, src, srcW, pal);
3d05e078
MN
2251 src= formatConvBuffer;
2252 }
1e621b18 2253
94daf2e9 2254#if COMPILE_TEMPLATE_MMX
8a322796 2255 // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one).
2da0d70d 2256 if (!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
e3d2500f 2257#else
2da0d70d 2258 if (!(flags&SWS_FAST_BILINEAR))
e3d2500f 2259#endif
077ea8a7 2260 {
40fa5140 2261 c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
077ea8a7 2262 }
8a322796 2263 else // fast bilinear upscale / crap downscale
077ea8a7 2264 {
57f9a560 2265#if ARCH_X86 && CONFIG_GPL
94daf2e9 2266#if COMPILE_TEMPLATE_MMX2
2da0d70d 2267 int i;
83c89c78 2268#if defined(PIC)
934626a9 2269 DECLARE_ALIGNED(8, uint64_t, ebxsave);
83c89c78 2270#endif
2da0d70d
DB
2271 if (canMMX2BeUsed)
2272 {
7ad6469e 2273 __asm__ volatile(
83c89c78 2274#if defined(PIC)
2da0d70d
DB
2275 "mov %%"REG_b", %5 \n\t"
2276#endif
2277 "pxor %%mm7, %%mm7 \n\t"
2278 "mov %0, %%"REG_c" \n\t"
2279 "mov %1, %%"REG_D" \n\t"
2280 "mov %2, %%"REG_d" \n\t"
2281 "mov %3, %%"REG_b" \n\t"
2282 "xor %%"REG_a", %%"REG_a" \n\t" // i
2283 PREFETCH" (%%"REG_c") \n\t"
2284 PREFETCH" 32(%%"REG_c") \n\t"
2285 PREFETCH" 64(%%"REG_c") \n\t"
99cefd0b 2286
b63f641e 2287#if ARCH_X86_64
6d606c4f 2288
bcdedf67 2289#define CALL_MMX2_FILTER_CODE \
2da0d70d
DB
2290 "movl (%%"REG_b"), %%esi \n\t"\
2291 "call *%4 \n\t"\
2292 "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\
2293 "add %%"REG_S", %%"REG_c" \n\t"\
2294 "add %%"REG_a", %%"REG_D" \n\t"\
2295 "xor %%"REG_a", %%"REG_a" \n\t"\
6d606c4f
AJ
2296
2297#else
2298
bcdedf67 2299#define CALL_MMX2_FILTER_CODE \
2da0d70d
DB
2300 "movl (%%"REG_b"), %%esi \n\t"\
2301 "call *%4 \n\t"\
2302 "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
2303 "add %%"REG_a", %%"REG_D" \n\t"\
2304 "xor %%"REG_a", %%"REG_a" \n\t"\
99cefd0b 2305
bc279024 2306#endif /* ARCH_X86_64 */
6d606c4f 2307
bcdedf67
RP
2308CALL_MMX2_FILTER_CODE
2309CALL_MMX2_FILTER_CODE
2310CALL_MMX2_FILTER_CODE
2311CALL_MMX2_FILTER_CODE
2312CALL_MMX2_FILTER_CODE
2313CALL_MMX2_FILTER_CODE
2314CALL_MMX2_FILTER_CODE
2315CALL_MMX2_FILTER_CODE
2ff198c1 2316
83c89c78 2317#if defined(PIC)
2da0d70d 2318 "mov %5, %%"REG_b" \n\t"
83c89c78 2319#endif
2da0d70d 2320 :: "m" (src), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
bcdedf67 2321 "m" (mmx2FilterCode)
83c89c78 2322#if defined(PIC)
2da0d70d 2323 ,"m" (ebxsave)
83c89c78 2324#endif
2da0d70d 2325 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
83c89c78 2326#if !defined(PIC)
2da0d70d
DB
2327 ,"%"REG_b
2328#endif
2329 );
2330 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128;
2331 }
2332 else
2333 {
94daf2e9 2334#endif /* COMPILE_TEMPLATE_MMX2 */
d0ce212a 2335 x86_reg xInc_shr16 = xInc >> 16;
2da0d70d
DB
2336 uint16_t xInc_mask = xInc & 0xffff;
2337 //NO MMX just normal asm ...
7ad6469e 2338 __asm__ volatile(
2da0d70d
DB
2339 "xor %%"REG_a", %%"REG_a" \n\t" // i
2340 "xor %%"REG_d", %%"REG_d" \n\t" // xx
6d08d7f3 2341 "xorl %%ecx, %%ecx \n\t" // xalpha
2da0d70d
DB
2342 ASMALIGN(4)
2343 "1: \n\t"
2344 "movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx]
2345 "movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1]
18c61752 2346 FAST_BILINEAR_X86
2da0d70d 2347 "movw %%si, (%%"REG_D", %%"REG_a", 2) \n\t"
6d08d7f3
RP
2348 "addw %4, %%cx \n\t" //xalpha += xInc&0xFFFF
2349 "adc %3, %%"REG_d" \n\t" //xx+= xInc>>16 + carry
2da0d70d
DB
2350
2351 "movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx]
2352 "movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1]
18c61752 2353 FAST_BILINEAR_X86
2da0d70d 2354 "movw %%si, 2(%%"REG_D", %%"REG_a", 2) \n\t"
6d08d7f3
RP
2355 "addw %4, %%cx \n\t" //xalpha += xInc&0xFFFF
2356 "adc %3, %%"REG_d" \n\t" //xx+= xInc>>16 + carry
2da0d70d
DB
2357
2358
2359 "add $2, %%"REG_a" \n\t"
2360 "cmp %2, %%"REG_a" \n\t"
2361 " jb 1b \n\t"
2362
2363
2364 :: "r" (src), "m" (dst), "m" (dstWidth), "m" (xInc_shr16), "m" (xInc_mask)
2365 : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
2366 );
94daf2e9 2367#if COMPILE_TEMPLATE_MMX2
2da0d70d 2368 } //if MMX2 can't be used
2ff198c1
MN
2369#endif
2370#else
40fa5140 2371 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
b63f641e 2372#endif /* ARCH_X86 */
077ea8a7 2373 }
6bc0c792 2374
6858492e 2375 if(!isAlpha && c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))){
6bc0c792
MN
2376 int i;
2377 //FIXME all pal and rgb srcFormats could do this convertion as well
2378 //FIXME all scalers more complex than bilinear could do half of this transform
2379 if(c->srcRange){
2380 for (i=0; i<dstWidth; i++)
2381 dst[i]= (dst[i]*14071 + 33561947)>>14;
2382 }else{
2383 for (i=0; i<dstWidth; i++)
aa13b0fc 2384 dst[i]= (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
6bc0c792
MN
2385 }
2386 }
2ff198c1
MN
2387}
2388
392b6567 2389static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
7ac40327
RP
2390 int dstWidth, const uint8_t *src1,
2391 const uint8_t *src2, int srcW, int xInc)
392b6567
RP
2392{
2393 int i;
2394 unsigned int xpos=0;
2395 for (i=0;i<dstWidth;i++)
2396 {
2397 register unsigned int xx=xpos>>16;
2398 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2399 dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2400 dst[i+VOFW]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2401 /* slower
2402 dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha;
2403 dst[i+VOFW]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha;
2404 */
2405 xpos+=xInc;
2406 }
2407}
2408
7ac40327
RP
2409inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src1, const uint8_t *src2,
2410 int srcW, int xInc, int flags, const int16_t *hChrFilter,
2411 const int16_t *hChrFilterPos, int hChrFilterSize,
95b5770b
RP
2412 int srcFormat, uint8_t *formatConvBuffer,
2413 uint32_t *pal)
2ff198c1 2414{
fdf70cc5
RP
2415 int32_t av_unused *mmx2FilterPos = c->chrMmx2FilterPos;
2416 int16_t av_unused *mmx2Filter = c->chrMmx2Filter;
2417 int av_unused canMMX2BeUsed = c->canMMX2BeUsed;
bcdedf67 2418 void av_unused *mmx2FilterCode= c->chrMmx2FilterCode;
95b5770b 2419
40fa5140 2420 if (isGray(srcFormat) || srcFormat==PIX_FMT_MONOBLACK || srcFormat==PIX_FMT_MONOWHITE)
2da0d70d 2421 return;
40fa5140 2422
f2671197 2423 if (srcFormat==PIX_FMT_RGB32_1 || srcFormat==PIX_FMT_BGR32_1) {
40fa5140
RP
2424 src1 += ALT32_CORR;
2425 src2 += ALT32_CORR;
6ff0ad6b 2426 }
40fa5140 2427
e8417235
KS
2428 if (srcFormat==PIX_FMT_RGB48LE) {
2429 src1++;
2430 src2++;
2431 }
2432
40fa5140
RP
2433 if (c->hcscale_internal) {
2434 c->hcscale_internal(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
2da0d70d 2435 src1= formatConvBuffer;
8b2fce0d 2436 src2= formatConvBuffer+VOFW;
e28630fc 2437 }
1e621b18 2438
94daf2e9 2439#if COMPILE_TEMPLATE_MMX
8a322796 2440 // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one).
2da0d70d 2441 if (!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
e3d2500f 2442#else
2da0d70d 2443 if (!(flags&SWS_FAST_BILINEAR))
e3d2500f 2444#endif
077ea8a7 2445 {
40fa5140
RP
2446 c->hScale(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
2447 c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
077ea8a7 2448 }
8a322796 2449 else // fast bilinear upscale / crap downscale
077ea8a7 2450 {
57f9a560 2451#if ARCH_X86 && CONFIG_GPL
94daf2e9 2452#if COMPILE_TEMPLATE_MMX2
2da0d70d 2453 int i;
83c89c78 2454#if defined(PIC)
934626a9 2455 DECLARE_ALIGNED(8, uint64_t, ebxsave);
83c89c78 2456#endif
2da0d70d
DB
2457 if (canMMX2BeUsed)
2458 {
7ad6469e 2459 __asm__ volatile(
83c89c78 2460#if defined(PIC)
2da0d70d
DB
2461 "mov %%"REG_b", %6 \n\t"
2462#endif
2463 "pxor %%mm7, %%mm7 \n\t"
2464 "mov %0, %%"REG_c" \n\t"
2465 "mov %1, %%"REG_D" \n\t"
2466 "mov %2, %%"REG_d" \n\t"
2467 "mov %3, %%"REG_b" \n\t"
2468 "xor %%"REG_a", %%"REG_a" \n\t" // i
2469 PREFETCH" (%%"REG_c") \n\t"
2470 PREFETCH" 32(%%"REG_c") \n\t"
2471 PREFETCH" 64(%%"REG_c") \n\t"
b7dc6f66 2472
b63f641e 2473#if ARCH_X86_64
6d606c4f 2474
bcdedf67 2475#define CALL_MMX2_FILTER_CODE \
2da0d70d
DB
2476 "movl (%%"REG_b"), %%esi \n\t"\
2477 "call *%4 \n\t"\
2478 "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\
2479 "add %%"REG_S", %%"REG_c" \n\t"\
2480 "add %%"REG_a", %%"REG_D" \n\t"\
2481 "xor %%"REG_a", %%"REG_a" \n\t"\
6d606c4f
AJ
2482
2483#else
2484
bcdedf67 2485#define CALL_MMX2_FILTER_CODE \
2da0d70d
DB
2486 "movl (%%"REG_b"), %%esi \n\t"\
2487 "call *%4 \n\t"\
2488 "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
2489 "add %%"REG_a", %%"REG_D" \n\t"\
2490 "xor %%"REG_a", %%"REG_a" \n\t"\
b7dc6f66 2491
bc279024 2492#endif /* ARCH_X86_64 */
6d606c4f 2493
bcdedf67
RP
2494CALL_MMX2_FILTER_CODE
2495CALL_MMX2_FILTER_CODE
2496CALL_MMX2_FILTER_CODE
2497CALL_MMX2_FILTER_CODE
2da0d70d
DB
2498 "xor %%"REG_a", %%"REG_a" \n\t" // i
2499 "mov %5, %%"REG_c" \n\t" // src
2500 "mov %1, %%"REG_D" \n\t" // buf1
8b2fce0d 2501 "add $"AV_STRINGIFY(VOF)", %%"REG_D" \n\t"
2da0d70d
DB
2502 PREFETCH" (%%"REG_c") \n\t"
2503 PREFETCH" 32(%%"REG_c") \n\t"
2504 PREFETCH" 64(%%"REG_c") \n\t"
b7dc6f66 2505
bcdedf67
RP
2506CALL_MMX2_FILTER_CODE
2507CALL_MMX2_FILTER_CODE
2508CALL_MMX2_FILTER_CODE
2509CALL_MMX2_FILTER_CODE
b7dc6f66 2510
83c89c78 2511#if defined(PIC)
2da0d70d 2512 "mov %6, %%"REG_b" \n\t"
83c89c78 2513#endif
2da0d70d 2514 :: "m" (src1), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
bcdedf67 2515 "m" (mmx2FilterCode), "m" (src2)
83c89c78 2516#if defined(PIC)
2da0d70d 2517 ,"m" (ebxsave)
83c89c78 2518#endif
2da0d70d 2519 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
83c89c78 2520#if !defined(PIC)
2da0d70d
DB
2521 ,"%"REG_b
2522#endif
2523 );
2524 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
2525 {
2526 //printf("%d %d %d\n", dstWidth, i, srcW);
2527 dst[i] = src1[srcW-1]*128;
8b2fce0d 2528 dst[i+VOFW] = src2[srcW-1]*128;
2da0d70d
DB
2529 }
2530 }
2531 else
2532 {
94daf2e9 2533#endif /* COMPILE_TEMPLATE_MMX2 */
d0ce212a 2534 x86_reg xInc_shr16 = (x86_reg) (xInc >> 16);
2da0d70d 2535 uint16_t xInc_mask = xInc & 0xffff;
7ad6469e 2536 __asm__ volatile(
2da0d70d
DB
2537 "xor %%"REG_a", %%"REG_a" \n\t" // i
2538 "xor %%"REG_d", %%"REG_d" \n\t" // xx
6d08d7f3 2539 "xorl %%ecx, %%ecx \n\t" // xalpha
2da0d70d
DB
2540 ASMALIGN(4)
2541 "1: \n\t"
2542 "mov %0, %%"REG_S" \n\t"
2543 "movzbl (%%"REG_S", %%"REG_d"), %%edi \n\t" //src[xx]
2544 "movzbl 1(%%"REG_S", %%"REG_d"), %%esi \n\t" //src[xx+1]
18c61752 2545 FAST_BILINEAR_X86
2da0d70d
DB
2546 "movw %%si, (%%"REG_D", %%"REG_a", 2) \n\t"
2547
2548 "movzbl (%5, %%"REG_d"), %%edi \n\t" //src[xx]
2549 "movzbl 1(%5, %%"REG_d"), %%esi \n\t" //src[xx+1]
18c61752 2550 FAST_BILINEAR_X86
8b2fce0d 2551 "movw %%si, "AV_STRINGIFY(VOF)"(%%"REG_D", %%"REG_a", 2) \n\t"
2da0d70d 2552
6d08d7f3
RP
2553 "addw %4, %%cx \n\t" //xalpha += xInc&0xFFFF
2554 "adc %3, %%"REG_d" \n\t" //xx+= xInc>>16 + carry
2da0d70d
DB
2555 "add $1, %%"REG_a" \n\t"
2556 "cmp %2, %%"REG_a" \n\t"
2557 " jb 1b \n\t"
2ff198c1 2558
8a322796
DB
2559/* GCC 3.3 makes MPlayer crash on IA-32 machines when using "g" operand here,
2560 which is needed to support GCC 4.0. */
b63f641e 2561#if ARCH_X86_64 && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
e29c3f93 2562 :: "m" (src1), "m" (dst), "g" (dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
dc77ef7f 2563#else
e29c3f93 2564 :: "m" (src1), "m" (dst), "m" (dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
dc77ef7f 2565#endif
2da0d70d
DB
2566 "r" (src2)
2567 : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
2568 );
94daf2e9 2569#if COMPILE_TEMPLATE_MMX2
2da0d70d 2570 } //if MMX2 can't be used
2ff198c1
MN
2571#endif
2572#else
40fa5140 2573 c->hcscale_fast(c, dst, dstWidth, src1, src2, srcW, xInc);
b63f641e 2574#endif /* ARCH_X86 */
2da0d70d 2575 }
6bc0c792
MN
2576 if(c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))){
2577 int i;
2578 //FIXME all pal and rgb srcFormats could do this convertion as well
2579 //FIXME all scalers more complex than bilinear could do half of this transform
2580 if(c->srcRange){
2581 for (i=0; i<dstWidth; i++){
2582 dst[i ]= (dst[i ]*1799 + 4081085)>>11; //1469
2583 dst[i+VOFW]= (dst[i+VOFW]*1799 + 4081085)>>11; //1469
2584 }
2585 }else{
2586 for (i=0; i<dstWidth; i++){
aa13b0fc
MN
2587 dst[i ]= (FFMIN(dst[i ],30775)*4663 - 9289992)>>12; //-264
2588 dst[i+VOFW]= (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12; //-264
6bc0c792
MN
2589 }
2590 }
2591 }
077ea8a7
MN
2592}
2593
3e499f53 2594static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2da0d70d
DB
2595 int srcSliceH, uint8_t* dst[], int dstStride[]){
2596
2597 /* load a few things into local vars to make the code more readable? and faster */
2598 const int srcW= c->srcW;
2599 const int dstW= c->dstW;
2600 const int dstH= c->dstH;
2601 const int chrDstW= c->chrDstW;
2602 const int chrSrcW= c->chrSrcW;
2603 const int lumXInc= c->lumXInc;
2604 const int chrXInc= c->chrXInc;
2605 const int dstFormat= c->dstFormat;
2606 const int srcFormat= c->srcFormat;
2607 const int flags= c->flags;
2da0d70d
DB
2608 int16_t *vLumFilterPos= c->vLumFilterPos;
2609 int16_t *vChrFilterPos= c->vChrFilterPos;
2610 int16_t *hLumFilterPos= c->hLumFilterPos;
2611 int16_t *hChrFilterPos= c->hChrFilterPos;
2612 int16_t *vLumFilter= c->vLumFilter;
2613 int16_t *vChrFilter= c->vChrFilter;
2614 int16_t *hLumFilter= c->hLumFilter;
2615 int16_t *hChrFilter= c->hChrFilter;
2616 int32_t *lumMmxFilter= c->lumMmxFilter;
2617 int32_t *chrMmxFilter= c->chrMmxFilter;
6858492e 2618 int32_t *alpMmxFilter= c->alpMmxFilter;
2da0d70d
DB
2619 const int vLumFilterSize= c->vLumFilterSize;
2620 const int vChrFilterSize= c->vChrFilterSize;
2621 const int hLumFilterSize= c->hLumFilterSize;
2622 const int hChrFilterSize= c->hChrFilterSize;
2623 int16_t **lumPixBuf= c->lumPixBuf;
2624 int16_t **chrPixBuf= c->chrPixBuf;
6858492e 2625 int16_t **alpPixBuf= c->alpPixBuf;
2da0d70d
DB
2626 const int vLumBufSize= c->vLumBufSize;
2627 const int vChrBufSize= c->vChrBufSize;
2da0d70d
DB
2628 uint8_t *formatConvBuffer= c->formatConvBuffer;
2629 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2630 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2631 int lastDstY;
e150ef8d 2632 uint32_t *pal=c->pal_yuv;
2da0d70d 2633
8a322796 2634 /* vars which will change and which we need to store back in the context */
2da0d70d
DB
2635 int dstY= c->dstY;
2636 int lumBufIndex= c->lumBufIndex;
2637 int chrBufIndex= c->chrBufIndex;
2638 int lastInLumBuf= c->lastInLumBuf;
2639 int lastInChrBuf= c->lastInChrBuf;
2640
2641 if (isPacked(c->srcFormat)){
2da0d70d
DB
2642 src[0]=
2643 src[1]=
6858492e
CS
2644 src[2]=
2645 src[3]= src[0];
2da0d70d
DB
2646 srcStride[0]=
2647 srcStride[1]=
6858492e
CS
2648 srcStride[2]=
2649 srcStride[3]= srcStride[0];
2da0d70d
DB
2650 }
2651 srcStride[1]<<= c->vChrDrop;
2652 srcStride[2]<<= c->vChrDrop;
2653
2654 //printf("swscale %X %X %X -> %X %X %X\n", (int)src[0], (int)src[1], (int)src[2],
2655 // (int)dst[0], (int)dst[1], (int)dst[2]);
c7a810cc 2656
2da0d70d
DB
2657 //printf("sws Strides:%d %d %d -> %d %d %d\n", srcStride[0],srcStride[1],srcStride[2],
2658 //dstStride[0],dstStride[1],dstStride[2]);
2659
6858492e 2660 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0)
2da0d70d 2661 {
6683a37f
DP
2662 static int warnedAlready=0; //FIXME move this into the context perhaps
2663 if (flags & SWS_PRINT_INFO && !warnedAlready)
2da0d70d 2664 {
4b0c30b7 2665 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
8a322796 2666 " ->cannot do aligned memory accesses anymore\n");
6683a37f 2667 warnedAlready=1;
2da0d70d
DB
2668 }
2669 }
2670
8a322796
DB
2671 /* Note the user might start scaling the picture in the middle so this
2672 will not get executed. This is not really intended but works
2673 currently, so people might do it. */
2da0d70d
DB
2674 if (srcSliceY ==0){
2675 lumBufIndex=0;
2676 chrBufIndex=0;
2677 dstY=0;
2678 lastInLumBuf= -1;
2679 lastInChrBuf= -1;
2680 }
2681
2682 lastDstY= dstY;
2683
2684 for (;dstY < dstH; dstY++){
2685 unsigned char *dest =dst[0]+dstStride[0]*dstY;
2686 const int chrDstY= dstY>>c->chrDstVSubSample;
2687 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
2688 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
6858492e 2689 unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
2da0d70d
DB
2690
2691 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2692 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
fb91df39
RP
2693 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2694 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2695 int enough_lines;
2da0d70d
DB
2696
2697 //printf("dstY:%d dstH:%d firstLumSrcY:%d lastInLumBuf:%d vLumBufSize: %d vChrBufSize: %d slice: %d %d vLumFilterSize: %d firstChrSrcY: %d vChrFilterSize: %d c->chrSrcVSubSample: %d\n",
2698 // dstY, dstH, firstLumSrcY, lastInLumBuf, vLumBufSize, vChrBufSize, srcSliceY, srcSliceH, vLumFilterSize, firstChrSrcY, vChrFilterSize, c->chrSrcVSubSample);
2699 //handle holes (FAST_BILINEAR & weird filters)
2700 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2701 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2702 //printf("%d %d %d\n", firstChrSrcY, lastInChrBuf, vChrBufSize);
fcc402b1
LB
2703 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2704 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2da0d70d
DB
2705
2706 // Do we have enough lines in this slice to output the dstY line
fb91df39
RP
2707 enough_lines = lastLumSrcY < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2708 if (!enough_lines) {
2709 lastLumSrcY = srcSliceY + srcSliceH - 1;
2710 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2711 }
2712
cbdc08d5
RP
2713 /* printf("%d %d Last:%d %d LastInBuf:%d %d Index:%d %d Y:%d FSize: %d %d BSize: %d %d\n",
2714 firstChrSrcY,firstLumSrcY,lastChrSrcY,lastLumSrcY,
2715 lastInChrBuf,lastInLumBuf,chrBufIndex,lumBufIndex,dstY,vChrFilterSize,vLumFilterSize,
2716 vChrBufSize, vLumBufSize);*/
fb91df39 2717
cbdc08d5
RP
2718 //Do horizontal scaling
2719 while(lastInLumBuf < lastLumSrcY)
2720 {
2721 uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2722 uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2723 lumBufIndex++;
2724 //printf("%d %d %d %d\n", lumBufIndex, vLumBufSize, lastInLumBuf, lastLumSrcY);
2725 assert(lumBufIndex < 2*vLumBufSize);
2726 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2727 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2728 //printf("%d %d\n", lumBufIndex, vLumBufSize);
2729 RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2730 flags, hLumFilter, hLumFilterPos, hLumFilterSize,
2731 c->srcFormat, formatConvBuffer,
2732 pal, 0);
2733 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2734 RENAME(hyscale)(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW, lumXInc,
95b5770b
RP
2735 flags, hLumFilter, hLumFilterPos, hLumFilterSize,
2736 c->srcFormat, formatConvBuffer,
cbdc08d5
RP
2737 pal, 1);
2738 lastInLumBuf++;
2739 }
2740 while(lastInChrBuf < lastChrSrcY)
2741 {
2742 uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
2743 uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
2744 chrBufIndex++;
2745 assert(chrBufIndex < 2*vChrBufSize);
2746 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2747 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2748 //FIXME replace parameters through context struct (some at least)
2749
2750 if (!(isGray(srcFormat) || isGray(dstFormat)))
2751 RENAME(hcscale)(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
2752 flags, hChrFilter, hChrFilterPos, hChrFilterSize,
2753 c->srcFormat, formatConvBuffer,
2754 pal);
2755 lastInChrBuf++;
2756 }
2757 //wrap buf index around to stay inside the ring buffer
2758 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2759 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2760 if (!enough_lines)
2da0d70d 2761 break; //we can't output a dstY line so let's try with the next slice
d3f41512 2762
94daf2e9 2763#if COMPILE_TEMPLATE_MMX
88e2a9ae 2764 c->blueDither= ff_dither8[dstY&1];
92c7b471 2765 if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555)
88e2a9ae 2766 c->greenDither= ff_dither8[dstY&1];
92c7b471 2767 else
88e2a9ae
CEH
2768 c->greenDither= ff_dither4[dstY&1];
2769 c->redDither= ff_dither8[(dstY+1)&1];
2da0d70d
DB
2770#endif
2771 if (dstY < dstH-2)
2772 {
7ac40327
RP
2773 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2774 const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2775 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
94daf2e9 2776#if COMPILE_TEMPLATE_MMX
2da0d70d
DB
2777 int i;
2778 if (flags & SWS_ACCURATE_RND){
1625216e 2779 int s= APCK_SIZE / 8;
2da0d70d 2780 for (i=0; i<vLumFilterSize; i+=2){
1625216e
MN
2781 *(void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
2782 *(void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
2783 lumMmxFilter[s*i+APCK_COEF/4 ]=
2784 lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
2da0d70d 2785 + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
6858492e
CS
2786 if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
2787 *(void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
2788 *(void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
2789 alpMmxFilter[s*i+APCK_COEF/4 ]=
2790 alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ];
2791 }
2da0d70d
DB
2792 }
2793 for (i=0; i<vChrFilterSize; i+=2){
1625216e
MN
2794 *(void**)&chrMmxFilter[s*i ]= chrSrcPtr[i ];
2795 *(void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrSrcPtr[i+(vChrFilterSize>1)];
2796 chrMmxFilter[s*i+APCK_COEF/4 ]=
2797 chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
2da0d70d 2798 + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
bca11e75 2799 }
2da0d70d
DB
2800 }else{
2801 for (i=0; i<vLumFilterSize; i++)
2802 {
2803 lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
2804 lumMmxFilter[4*i+1]= (uint64_t)lumSrcPtr[i] >> 32;
2805 lumMmxFilter[4*i+2]=
2806 lumMmxFilter[4*i+3]=
2807 ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
6858492e
CS
2808 if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
2809 alpMmxFilter[4*i+0]= (int32_t)alpSrcPtr[i];
2810 alpMmxFilter[4*i+1]= (uint64_t)alpSrcPtr[i] >> 32;
2811 alpMmxFilter[4*i+2]=
2812 alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
2813 }
2da0d70d
DB
2814 }
2815 for (i=0; i<vChrFilterSize; i++)
2816 {
2817 chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
2818 chrMmxFilter[4*i+1]= (uint64_t)chrSrcPtr[i] >> 32;
2819 chrMmxFilter[4*i+2]=
2820 chrMmxFilter[4*i+3]=
2821 ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
2822 }
2823 }
6542b44e 2824#endif
2da0d70d
DB
2825 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21){
2826 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2827 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
40fa5140 2828 c->yuv2nv12X(c,
2da0d70d
DB
2829 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
2830 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2831 dest, uDest, dstW, chrDstW, dstFormat);
e3d2500f 2832 }
b0880d5d 2833 else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) //YV12 like
2da0d70d
DB
2834 {
2835 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2836 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
8a322796 2837 if (vLumFilterSize == 1 && vChrFilterSize == 1) // unscaled YV12
2da0d70d
DB
2838 {
2839 int16_t *lumBuf = lumPixBuf[0];
2840 int16_t *chrBuf= chrPixBuf[0];
6858492e 2841 int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpPixBuf[0] : NULL;
40fa5140 2842 c->yuv2yuv1(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW);
2da0d70d
DB
2843 }
2844 else //General YV12
2845 {
40fa5140 2846 c->yuv2yuvX(c,
2da0d70d
DB
2847 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
2848 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
6858492e 2849 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
2da0d70d
DB
2850 }
2851 }
2852 else
2853 {
fcc402b1
LB
2854 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2855 assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
8a322796 2856 if (vLumFilterSize == 1 && vChrFilterSize == 2) //unscaled RGB
2da0d70d
DB
2857 {
2858 int chrAlpha= vChrFilter[2*dstY+1];
f0faee4c
MN
2859 if(flags & SWS_FULL_CHR_H_INT){
2860 yuv2rgbXinC_full(c, //FIXME write a packed1_full function
2861 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2862 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
6858492e 2863 alpSrcPtr, dest, dstW, dstY);
f0faee4c 2864 }else{
40fa5140 2865 c->yuv2packed1(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
6858492e 2866 alpPixBuf ? *alpSrcPtr : NULL,
14014d47 2867 dest, dstW, chrAlpha, dstFormat, flags, dstY);
f0faee4c 2868 }
2da0d70d 2869 }
8a322796 2870 else if (vLumFilterSize == 2 && vChrFilterSize == 2) //bilinear upscale RGB
2da0d70d
DB
2871 {
2872 int lumAlpha= vLumFilter[2*dstY+1];
2873 int chrAlpha= vChrFilter[2*dstY+1];
2874 lumMmxFilter[2]=
2875 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
2876 chrMmxFilter[2]=
2877 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
f0faee4c
MN
2878 if(flags & SWS_FULL_CHR_H_INT){
2879 yuv2rgbXinC_full(c, //FIXME write a packed2_full function
2880 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2881 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
6858492e 2882 alpSrcPtr, dest, dstW, dstY);
f0faee4c 2883 }else{
40fa5140 2884 c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
6858492e 2885 alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
14014d47 2886 dest, dstW, lumAlpha, chrAlpha, dstY);
f0faee4c 2887 }
2da0d70d 2888 }
8a322796 2889 else //general RGB
2da0d70d 2890 {
f0faee4c
MN
2891 if(flags & SWS_FULL_CHR_H_INT){
2892 yuv2rgbXinC_full(c,
2893 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2894 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
6858492e 2895 alpSrcPtr, dest, dstW, dstY);
f0faee4c 2896 }else{
40fa5140 2897 c->yuv2packedX(c,
14014d47
MN
2898 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2899 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
6858492e 2900 alpSrcPtr, dest, dstW, dstY);
f0faee4c 2901 }
2da0d70d
DB
2902 }
2903 }
2904 }
2905 else // hmm looks like we can't use MMX here without overwriting this array's tail
2906 {
7ac40327
RP
2907 const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2908 const int16_t **chrSrcPtr= (const int16_t **)chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2909 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2da0d70d
DB
2910 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21){
2911 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2912 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
2913 yuv2nv12XinC(
2914 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
2915 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2916 dest, uDest, dstW, chrDstW, dstFormat);
2917 }
b0880d5d 2918 else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) //YV12
2da0d70d
DB
2919 {
2920 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2921 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
2922 yuv2yuvXinC(
2923 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
2924 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
6858492e 2925 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
2da0d70d
DB
2926 }
2927 else
2928 {
fcc402b1
LB
2929 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2930 assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
f0faee4c
MN
2931 if(flags & SWS_FULL_CHR_H_INT){
2932 yuv2rgbXinC_full(c,
2933 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2934 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
6858492e 2935 alpSrcPtr, dest, dstW, dstY);
f0faee4c 2936 }else{
14014d47
MN
2937 yuv2packedXinC(c,
2938 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2939 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
6858492e 2940 alpSrcPtr, dest, dstW, dstY);
f0faee4c 2941 }
2da0d70d
DB
2942 }
2943 }
2944 }
17f715fa 2945
6268f55b
CS
2946 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2947 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2948
94daf2e9 2949#if COMPILE_TEMPLATE_MMX
5b7c7dd3
RP
2950 if (flags & SWS_CPU_CAPS_MMX2 ) __asm__ volatile("sfence":::"memory");
2951 /* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
2952 if (flags & SWS_CPU_CAPS_3DNOW) __asm__ volatile("femms" :::"memory");
2953 else __asm__ volatile("emms" :::"memory");
17f715fa 2954#endif
2da0d70d
DB
2955 /* store changed local vars back in the context */
2956 c->dstY= dstY;
2957 c->lumBufIndex= lumBufIndex;
2958 c->chrBufIndex= chrBufIndex;
2959 c->lastInLumBuf= lastInLumBuf;
2960 c->lastInChrBuf= lastInChrBuf;
d4e24275 2961
2da0d70d 2962 return dstY - lastDstY;
627690b5 2963}
40fa5140
RP
2964
2965static void RENAME(sws_init_swScale)(SwsContext *c)
2966{
2967 enum PixelFormat srcFormat = c->srcFormat;
2968
2969 c->yuv2nv12X = RENAME(yuv2nv12X );
2970 c->yuv2yuv1 = RENAME(yuv2yuv1 );
2971 c->yuv2yuvX = RENAME(yuv2yuvX );
2972 c->yuv2packed1 = RENAME(yuv2packed1 );
2973 c->yuv2packed2 = RENAME(yuv2packed2 );
2974 c->yuv2packedX = RENAME(yuv2packedX );
2975
2976 c->hScale = RENAME(hScale );
2977
2978 c->hyscale_fast = RENAME(hyscale_fast);
2979 c->hcscale_fast = RENAME(hcscale_fast);
2980
2981 c->hcscale_internal = NULL;
2982 switch(srcFormat) {
2983 case PIX_FMT_YUYV422 : c->hcscale_internal = RENAME(yuy2ToUV); break;
2984 case PIX_FMT_UYVY422 : c->hcscale_internal = RENAME(uyvyToUV); break;
2985 case PIX_FMT_RGB8 :
2986 case PIX_FMT_BGR8 :
2987 case PIX_FMT_PAL8 :
2988 case PIX_FMT_BGR4_BYTE:
80704c47 2989 case PIX_FMT_RGB4_BYTE: c->hcscale_internal = palToUV; break;
de1275d5
MN
2990 case PIX_FMT_YUV420PBE:
2991 case PIX_FMT_YUV422PBE:
2992 case PIX_FMT_YUV444PBE: c->hcscale_internal = RENAME(BEToUV); break;
2993 case PIX_FMT_YUV420PLE:
2994 case PIX_FMT_YUV422PLE:
2995 case PIX_FMT_YUV444PLE: c->hcscale_internal = RENAME(LEToUV); break;
40fa5140
RP
2996 }
2997 if (c->chrSrcHSubSample) {
2998 switch(srcFormat) {
e8417235
KS
2999 case PIX_FMT_RGB48BE:
3000 case PIX_FMT_RGB48LE: c->hcscale_internal = rgb48ToUV_half; break;
40fa5140 3001 case PIX_FMT_RGB32 :
80704c47 3002 case PIX_FMT_RGB32_1: c->hcscale_internal = bgr32ToUV_half; break;
40fa5140 3003 case PIX_FMT_BGR24 : c->hcscale_internal = RENAME(bgr24ToUV_half); break;
80704c47
KS
3004 case PIX_FMT_BGR565 : c->hcscale_internal = bgr16ToUV_half; break;
3005 case PIX_FMT_BGR555 : c->hcscale_internal = bgr15ToUV_half; break;
40fa5140 3006 case PIX_FMT_BGR32 :
80704c47 3007 case PIX_FMT_BGR32_1: c->hcscale_internal = rgb32ToUV_half; break;
40fa5140 3008 case PIX_FMT_RGB24 : c->hcscale_internal = RENAME(rgb24ToUV_half); break;
80704c47
KS
3009 case PIX_FMT_RGB565 : c->hcscale_internal = rgb16ToUV_half; break;
3010 case PIX_FMT_RGB555 : c->hcscale_internal = rgb15ToUV_half; break;
40fa5140
RP
3011 }
3012 } else {
3013 switch(srcFormat) {
e8417235
KS
3014 case PIX_FMT_RGB48BE:
3015 case PIX_FMT_RGB48LE: c->hcscale_internal = rgb48ToUV; break;
40fa5140 3016 case PIX_FMT_RGB32 :
80704c47 3017 case PIX_FMT_RGB32_1: c->hcscale_internal = bgr32ToUV; break;
40fa5140 3018 case PIX_FMT_BGR24 : c->hcscale_internal = RENAME(bgr24ToUV); break;
80704c47
KS
3019 case PIX_FMT_BGR565 : c->hcscale_internal = bgr16ToUV; break;
3020 case PIX_FMT_BGR555 : c->hcscale_internal = bgr15ToUV; break;
40fa5140 3021 case PIX_FMT_BGR32 :
80704c47 3022 case PIX_FMT_BGR32_1: c->hcscale_internal = rgb32ToUV; break;
40fa5140 3023 case PIX_FMT_RGB24 : c->hcscale_internal = RENAME(rgb24ToUV); break;
80704c47
KS
3024 case PIX_FMT_RGB565 : c->hcscale_internal = rgb16ToUV; break;
3025 case PIX_FMT_RGB555 : c->hcscale_internal = rgb15ToUV; break;
40fa5140
RP
3026 }
3027 }
3028
3029 c->hyscale_internal = NULL;
39e5f87b 3030 c->hascale_internal = NULL;
40fa5140
RP
3031 switch (srcFormat) {
3032 case PIX_FMT_YUYV422 :
de1275d5
MN
3033 case PIX_FMT_YUV420PBE:
3034 case PIX_FMT_YUV422PBE:
3035 case PIX_FMT_YUV444PBE:
40fa5140
RP
3036 case PIX_FMT_GRAY16BE : c->hyscale_internal = RENAME(yuy2ToY); break;
3037 case PIX_FMT_UYVY422 :
de1275d5
MN
3038 case PIX_FMT_YUV420PLE:
3039 case PIX_FMT_YUV422PLE:
3040 case PIX_FMT_YUV444PLE:
40fa5140
RP
3041 case PIX_FMT_GRAY16LE : c->hyscale_internal = RENAME(uyvyToY); break;
3042 case PIX_FMT_BGR24 : c->hyscale_internal = RENAME(bgr24ToY); break;
80704c47
KS
3043 case PIX_FMT_BGR565 : c->hyscale_internal = bgr16ToY; break;
3044 case PIX_FMT_BGR555 : c->hyscale_internal = bgr15ToY; break;
40fa5140 3045 case PIX_FMT_RGB24 : c->hyscale_internal = RENAME(rgb24ToY); break;
80704c47
KS
3046 case PIX_FMT_RGB565 : c->hyscale_internal = rgb16ToY; break;
3047 case PIX_FMT_RGB555 : c->hyscale_internal = rgb15ToY; break;
40fa5140
RP
3048 case PIX_FMT_RGB8 :
3049 case PIX_FMT_BGR8 :
3050 case PIX_FMT_PAL8 :
3051 case PIX_FMT_BGR4_BYTE:
80704c47
KS
3052 case PIX_FMT_RGB4_BYTE: c->hyscale_internal = palToY; break;
3053 case PIX_FMT_MONOBLACK: c->hyscale_internal = monoblack2Y; break;
3054 case PIX_FMT_MONOWHITE: c->hyscale_internal = monowhite2Y; break;
39e5f87b 3055 case PIX_FMT_RGB32 :
80704c47 3056 case PIX_FMT_RGB32_1: c->hyscale_internal = bgr32ToY; break;
39e5f87b 3057 case PIX_FMT_BGR32 :
80704c47 3058 case PIX_FMT_BGR32_1: c->hyscale_internal = rgb32ToY; break;
e8417235
KS
3059 case PIX_FMT_RGB48BE:
3060 case PIX_FMT_RGB48LE: c->hyscale_internal = rgb48ToY; break;
40fa5140
RP
3061 }
3062 if (c->alpPixBuf) {
3063 switch (srcFormat) {
3064 case PIX_FMT_RGB32 :
3065 case PIX_FMT_RGB32_1:
3066 case PIX_FMT_BGR32 :
80704c47 3067 case PIX_FMT_BGR32_1: c->hascale_internal = abgrToA; break;
40fa5140
RP
3068 }
3069 }
3070}