More indentation changes leftover from r29522:
[libav.git] / libswscale / swscale_template.c
CommitLineData
fe8054c0 1/*
d026b45e
DB
2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with FFmpeg; if not, write to the Free Software
b19bcbaa 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
d026b45e 19 *
8a322796
DB
20 * The C code (not assembly, MMX, ...) of this file can be used
21 * under the LGPL license.
d026b45e 22 */
783e9cc9 23
6e1c66bc 24#undef REAL_MOVNTQ
541c4eb9 25#undef MOVNTQ
7d7f78b5 26#undef PAVGB
48a05cec
MN
27#undef PREFETCH
28#undef PREFETCHW
48a05cec 29
94daf2e9 30#if COMPILE_TEMPLATE_AMD3DNOW
48a05cec
MN
31#define PREFETCH "prefetch"
32#define PREFETCHW "prefetchw"
94daf2e9 33#elif COMPILE_TEMPLATE_MMX2
48a05cec
MN
34#define PREFETCH "prefetchnta"
35#define PREFETCHW "prefetcht0"
36#else
d904b5fc
NP
37#define PREFETCH " # nop"
38#define PREFETCHW " # nop"
48a05cec
MN
39#endif
40
94daf2e9 41#if COMPILE_TEMPLATE_MMX2
d604bab9 42#define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
94daf2e9 43#elif COMPILE_TEMPLATE_AMD3DNOW
d604bab9
MN
44#define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
45#endif
d3f41512 46
94daf2e9 47#if COMPILE_TEMPLATE_MMX2
6e1c66bc 48#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
d604bab9 49#else
6e1c66bc 50#define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
d604bab9 51#endif
6e1c66bc 52#define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
d604bab9 53
94daf2e9 54#if COMPILE_TEMPLATE_ALTIVEC
009d2d74 55#include "ppc/swscale_altivec_template.c"
a2faa401
RD
56#endif
57
bca11e75 58#define YSCALEYUV2YV12X(x, offset, dest, width) \
7ad6469e 59 __asm__ volatile(\
c255994b
RP
60 "xor %%"REG_a", %%"REG_a" \n\t"\
61 "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
62 "movq %%mm3, %%mm4 \n\t"\
63 "lea " offset "(%0), %%"REG_d" \n\t"\
64 "mov (%%"REG_d"), %%"REG_S" \n\t"\
65 ASMALIGN(4) /* FIXME Unroll? */\
66 "1: \n\t"\
67 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
68 "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\
69 "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" /* srcData */\
70 "add $16, %%"REG_d" \n\t"\
71 "mov (%%"REG_d"), %%"REG_S" \n\t"\
72 "test %%"REG_S", %%"REG_S" \n\t"\
73 "pmulhw %%mm0, %%mm2 \n\t"\
74 "pmulhw %%mm0, %%mm5 \n\t"\
75 "paddw %%mm2, %%mm3 \n\t"\
76 "paddw %%mm5, %%mm4 \n\t"\
77 " jnz 1b \n\t"\
78 "psraw $3, %%mm3 \n\t"\
79 "psraw $3, %%mm4 \n\t"\
80 "packuswb %%mm4, %%mm3 \n\t"\
81 MOVNTQ(%%mm3, (%1, %%REGa))\
82 "add $8, %%"REG_a" \n\t"\
83 "cmp %2, %%"REG_a" \n\t"\
84 "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
85 "movq %%mm3, %%mm4 \n\t"\
86 "lea " offset "(%0), %%"REG_d" \n\t"\
87 "mov (%%"REG_d"), %%"REG_S" \n\t"\
88 "jb 1b \n\t"\
89 :: "r" (&c->redDither),\
90 "r" (dest), "g" (width)\
91 : "%"REG_a, "%"REG_d, "%"REG_S\
2da0d70d 92 );
bca11e75
MN
93
94#define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
7ad6469e 95 __asm__ volatile(\
c255994b
RP
96 "lea " offset "(%0), %%"REG_d" \n\t"\
97 "xor %%"REG_a", %%"REG_a" \n\t"\
98 "pxor %%mm4, %%mm4 \n\t"\
99 "pxor %%mm5, %%mm5 \n\t"\
100 "pxor %%mm6, %%mm6 \n\t"\
101 "pxor %%mm7, %%mm7 \n\t"\
102 "mov (%%"REG_d"), %%"REG_S" \n\t"\
103 ASMALIGN(4) \
104 "1: \n\t"\
105 "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* srcData */\
106 "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\
107 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
108 "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm1 \n\t" /* srcData */\
109 "movq %%mm0, %%mm3 \n\t"\
110 "punpcklwd %%mm1, %%mm0 \n\t"\
111 "punpckhwd %%mm1, %%mm3 \n\t"\
112 "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
113 "pmaddwd %%mm1, %%mm0 \n\t"\
114 "pmaddwd %%mm1, %%mm3 \n\t"\
115 "paddd %%mm0, %%mm4 \n\t"\
116 "paddd %%mm3, %%mm5 \n\t"\
117 "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* srcData */\
118 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
119 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
120 "test %%"REG_S", %%"REG_S" \n\t"\
121 "movq %%mm2, %%mm0 \n\t"\
122 "punpcklwd %%mm3, %%mm2 \n\t"\
123 "punpckhwd %%mm3, %%mm0 \n\t"\
124 "pmaddwd %%mm1, %%mm2 \n\t"\
125 "pmaddwd %%mm1, %%mm0 \n\t"\
126 "paddd %%mm2, %%mm6 \n\t"\
127 "paddd %%mm0, %%mm7 \n\t"\
128 " jnz 1b \n\t"\
129 "psrad $16, %%mm4 \n\t"\
130 "psrad $16, %%mm5 \n\t"\
131 "psrad $16, %%mm6 \n\t"\
132 "psrad $16, %%mm7 \n\t"\
133 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
134 "packssdw %%mm5, %%mm4 \n\t"\
135 "packssdw %%mm7, %%mm6 \n\t"\
136 "paddw %%mm0, %%mm4 \n\t"\
137 "paddw %%mm0, %%mm6 \n\t"\
138 "psraw $3, %%mm4 \n\t"\
139 "psraw $3, %%mm6 \n\t"\
140 "packuswb %%mm6, %%mm4 \n\t"\
141 MOVNTQ(%%mm4, (%1, %%REGa))\
142 "add $8, %%"REG_a" \n\t"\
143 "cmp %2, %%"REG_a" \n\t"\
144 "lea " offset "(%0), %%"REG_d" \n\t"\
145 "pxor %%mm4, %%mm4 \n\t"\
146 "pxor %%mm5, %%mm5 \n\t"\
147 "pxor %%mm6, %%mm6 \n\t"\
148 "pxor %%mm7, %%mm7 \n\t"\
149 "mov (%%"REG_d"), %%"REG_S" \n\t"\
150 "jb 1b \n\t"\
151 :: "r" (&c->redDither),\
152 "r" (dest), "g" (width)\
153 : "%"REG_a, "%"REG_d, "%"REG_S\
2da0d70d 154 );
c1b0bfb4
MN
155
156#define YSCALEYUV2YV121 \
2da0d70d
DB
157 "mov %2, %%"REG_a" \n\t"\
158 ASMALIGN(4) /* FIXME Unroll? */\
159 "1: \n\t"\
160 "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
161 "movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\
162 "psraw $7, %%mm0 \n\t"\
163 "psraw $7, %%mm1 \n\t"\
164 "packuswb %%mm1, %%mm0 \n\t"\
165 MOVNTQ(%%mm0, (%1, %%REGa))\
166 "add $8, %%"REG_a" \n\t"\
167 "jnc 1b \n\t"
c1b0bfb4 168
bf2bdde6
MN
169#define YSCALEYUV2YV121_ACCURATE \
170 "mov %2, %%"REG_a" \n\t"\
171 "pcmpeqw %%mm7, %%mm7 \n\t"\
172 "psrlw $15, %%mm7 \n\t"\
173 "psllw $6, %%mm7 \n\t"\
174 ASMALIGN(4) /* FIXME Unroll? */\
175 "1: \n\t"\
176 "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
177 "movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\
33a67bd6
MN
178 "paddsw %%mm7, %%mm0 \n\t"\
179 "paddsw %%mm7, %%mm1 \n\t"\
bf2bdde6
MN
180 "psraw $7, %%mm0 \n\t"\
181 "psraw $7, %%mm1 \n\t"\
182 "packuswb %%mm1, %%mm0 \n\t"\
183 MOVNTQ(%%mm0, (%1, %%REGa))\
184 "add $8, %%"REG_a" \n\t"\
185 "jnc 1b \n\t"
186
c1b0bfb4 187/*
2da0d70d
DB
188 :: "m" (-lumFilterSize), "m" (-chrFilterSize),
189 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4),
190 "r" (dest), "m" (dstW),
191 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
192 : "%eax", "%ebx", "%ecx", "%edx", "%esi"
c1b0bfb4 193*/
df57ab14 194#define YSCALEYUV2PACKEDX_UV \
7ad6469e 195 __asm__ volatile(\
c255994b
RP
196 "xor %%"REG_a", %%"REG_a" \n\t"\
197 ASMALIGN(4)\
198 "nop \n\t"\
199 "1: \n\t"\
200 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
201 "mov (%%"REG_d"), %%"REG_S" \n\t"\
202 "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
203 "movq %%mm3, %%mm4 \n\t"\
204 ASMALIGN(4)\
205 "2: \n\t"\
206 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
207 "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\
208 "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm5 \n\t" /* VsrcData */\
209 "add $16, %%"REG_d" \n\t"\
210 "mov (%%"REG_d"), %%"REG_S" \n\t"\
211 "pmulhw %%mm0, %%mm2 \n\t"\
212 "pmulhw %%mm0, %%mm5 \n\t"\
213 "paddw %%mm2, %%mm3 \n\t"\
214 "paddw %%mm5, %%mm4 \n\t"\
215 "test %%"REG_S", %%"REG_S" \n\t"\
216 " jnz 2b \n\t"\
df57ab14 217
fe91924d 218#define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \
df57ab14 219 "lea "offset"(%0), %%"REG_d" \n\t"\
2da0d70d 220 "mov (%%"REG_d"), %%"REG_S" \n\t"\
fe91924d
CS
221 "movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\
222 "movq "#dst1", "#dst2" \n\t"\
2da0d70d
DB
223 ASMALIGN(4)\
224 "2: \n\t"\
fe91924d
CS
225 "movq 8(%%"REG_d"), "#coeff" \n\t" /* filterCoeff */\
226 "movq (%%"REG_S", %%"REG_a", 2), "#src1" \n\t" /* Y1srcData */\
227 "movq 8(%%"REG_S", %%"REG_a", 2), "#src2" \n\t" /* Y2srcData */\
2da0d70d
DB
228 "add $16, %%"REG_d" \n\t"\
229 "mov (%%"REG_d"), %%"REG_S" \n\t"\
fe91924d
CS
230 "pmulhw "#coeff", "#src1" \n\t"\
231 "pmulhw "#coeff", "#src2" \n\t"\
232 "paddw "#src1", "#dst1" \n\t"\
233 "paddw "#src2", "#dst2" \n\t"\
2da0d70d
DB
234 "test %%"REG_S", %%"REG_S" \n\t"\
235 " jnz 2b \n\t"\
236
df57ab14
CS
237#define YSCALEYUV2PACKEDX \
238 YSCALEYUV2PACKEDX_UV \
fe91924d 239 YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET,%%mm0,%%mm2,%%mm5,%%mm1,%%mm7) \
df57ab14 240
c255994b
RP
241#define YSCALEYUV2PACKEDX_END \
242 :: "r" (&c->redDither), \
243 "m" (dummy), "m" (dummy), "m" (dummy),\
244 "r" (dest), "m" (dstW) \
245 : "%"REG_a, "%"REG_d, "%"REG_S \
2da0d70d 246 );
8422aa88 247
df57ab14 248#define YSCALEYUV2PACKEDX_ACCURATE_UV \
7ad6469e 249 __asm__ volatile(\
c255994b
RP
250 "xor %%"REG_a", %%"REG_a" \n\t"\
251 ASMALIGN(4)\
252 "nop \n\t"\
253 "1: \n\t"\
254 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
255 "mov (%%"REG_d"), %%"REG_S" \n\t"\
256 "pxor %%mm4, %%mm4 \n\t"\
257 "pxor %%mm5, %%mm5 \n\t"\
258 "pxor %%mm6, %%mm6 \n\t"\
259 "pxor %%mm7, %%mm7 \n\t"\
260 ASMALIGN(4)\
261 "2: \n\t"\
262 "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\
263 "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\
264 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
265 "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\
266 "movq %%mm0, %%mm3 \n\t"\
267 "punpcklwd %%mm1, %%mm0 \n\t"\
268 "punpckhwd %%mm1, %%mm3 \n\t"\
269 "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1 \n\t" /* filterCoeff */\
270 "pmaddwd %%mm1, %%mm0 \n\t"\
271 "pmaddwd %%mm1, %%mm3 \n\t"\
272 "paddd %%mm0, %%mm4 \n\t"\
273 "paddd %%mm3, %%mm5 \n\t"\
274 "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\
275 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
276 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
277 "test %%"REG_S", %%"REG_S" \n\t"\
278 "movq %%mm2, %%mm0 \n\t"\
279 "punpcklwd %%mm3, %%mm2 \n\t"\
280 "punpckhwd %%mm3, %%mm0 \n\t"\
281 "pmaddwd %%mm1, %%mm2 \n\t"\
282 "pmaddwd %%mm1, %%mm0 \n\t"\
283 "paddd %%mm2, %%mm6 \n\t"\
284 "paddd %%mm0, %%mm7 \n\t"\
285 " jnz 2b \n\t"\
286 "psrad $16, %%mm4 \n\t"\
287 "psrad $16, %%mm5 \n\t"\
288 "psrad $16, %%mm6 \n\t"\
289 "psrad $16, %%mm7 \n\t"\
290 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
291 "packssdw %%mm5, %%mm4 \n\t"\
292 "packssdw %%mm7, %%mm6 \n\t"\
293 "paddw %%mm0, %%mm4 \n\t"\
294 "paddw %%mm0, %%mm6 \n\t"\
295 "movq %%mm4, "U_TEMP"(%0) \n\t"\
296 "movq %%mm6, "V_TEMP"(%0) \n\t"\
df57ab14
CS
297
298#define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
299 "lea "offset"(%0), %%"REG_d" \n\t"\
2da0d70d
DB
300 "mov (%%"REG_d"), %%"REG_S" \n\t"\
301 "pxor %%mm1, %%mm1 \n\t"\
302 "pxor %%mm5, %%mm5 \n\t"\
303 "pxor %%mm7, %%mm7 \n\t"\
304 "pxor %%mm6, %%mm6 \n\t"\
305 ASMALIGN(4)\
306 "2: \n\t"\
307 "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\
308 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\
1625216e 309 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
2da0d70d
DB
310 "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\
311 "movq %%mm0, %%mm3 \n\t"\
312 "punpcklwd %%mm4, %%mm0 \n\t"\
313 "punpckhwd %%mm4, %%mm3 \n\t"\
1625216e 314 "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\
2da0d70d
DB
315 "pmaddwd %%mm4, %%mm0 \n\t"\
316 "pmaddwd %%mm4, %%mm3 \n\t"\
317 "paddd %%mm0, %%mm1 \n\t"\
318 "paddd %%mm3, %%mm5 \n\t"\
319 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\
1625216e
MN
320 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
321 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
2da0d70d
DB
322 "test %%"REG_S", %%"REG_S" \n\t"\
323 "movq %%mm2, %%mm0 \n\t"\
324 "punpcklwd %%mm3, %%mm2 \n\t"\
325 "punpckhwd %%mm3, %%mm0 \n\t"\
326 "pmaddwd %%mm4, %%mm2 \n\t"\
327 "pmaddwd %%mm4, %%mm0 \n\t"\
328 "paddd %%mm2, %%mm7 \n\t"\
329 "paddd %%mm0, %%mm6 \n\t"\
330 " jnz 2b \n\t"\
331 "psrad $16, %%mm1 \n\t"\
332 "psrad $16, %%mm5 \n\t"\
333 "psrad $16, %%mm7 \n\t"\
334 "psrad $16, %%mm6 \n\t"\
335 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
336 "packssdw %%mm5, %%mm1 \n\t"\
337 "packssdw %%mm6, %%mm7 \n\t"\
338 "paddw %%mm0, %%mm1 \n\t"\
339 "paddw %%mm0, %%mm7 \n\t"\
340 "movq "U_TEMP"(%0), %%mm3 \n\t"\
341 "movq "V_TEMP"(%0), %%mm4 \n\t"\
bca11e75 342
df57ab14
CS
343#define YSCALEYUV2PACKEDX_ACCURATE \
344 YSCALEYUV2PACKEDX_ACCURATE_UV \
345 YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET)
346
8422aa88 347#define YSCALEYUV2RGBX \
2da0d70d
DB
348 "psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\
349 "psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\
350 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
351 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
352 "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
353 "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
c255994b 354 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
2da0d70d
DB
355 "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
356 "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
357 "psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y-16)*/\
358 "psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y-16)*/\
359 "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
360 "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
c255994b 361 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
2da0d70d
DB
362 "paddw %%mm3, %%mm4 \n\t"\
363 "movq %%mm2, %%mm0 \n\t"\
364 "movq %%mm5, %%mm6 \n\t"\
365 "movq %%mm4, %%mm3 \n\t"\
366 "punpcklwd %%mm2, %%mm2 \n\t"\
367 "punpcklwd %%mm5, %%mm5 \n\t"\
368 "punpcklwd %%mm4, %%mm4 \n\t"\
369 "paddw %%mm1, %%mm2 \n\t"\
370 "paddw %%mm1, %%mm5 \n\t"\
371 "paddw %%mm1, %%mm4 \n\t"\
372 "punpckhwd %%mm0, %%mm0 \n\t"\
373 "punpckhwd %%mm6, %%mm6 \n\t"\
374 "punpckhwd %%mm3, %%mm3 \n\t"\
375 "paddw %%mm7, %%mm0 \n\t"\
376 "paddw %%mm7, %%mm6 \n\t"\
377 "paddw %%mm7, %%mm3 \n\t"\
378 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
379 "packuswb %%mm0, %%mm2 \n\t"\
380 "packuswb %%mm6, %%mm5 \n\t"\
381 "packuswb %%mm3, %%mm4 \n\t"\
d604bab9 382
6e1c66bc 383#define REAL_YSCALEYUV2PACKED(index, c) \
2da0d70d
DB
384 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
385 "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1 \n\t"\
386 "psraw $3, %%mm0 \n\t"\
387 "psraw $3, %%mm1 \n\t"\
388 "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
389 "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
390 "xor "#index", "#index" \n\t"\
391 ASMALIGN(4)\
392 "1: \n\t"\
393 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
394 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8b2fce0d
MN
395 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
396 "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
2da0d70d
DB
397 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
398 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
399 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
400 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
401 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
402 "psraw $7, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
403 "psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
404 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
405 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
406 "movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\
407 "movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\
408 "movq 8(%0, "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\
409 "movq 8(%1, "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\
410 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
411 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
412 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
413 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
414 "psraw $7, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
415 "psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
416 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
417 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
6a4970ab 418
6e1c66bc 419#define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c)
6a4970ab 420
df57ab14 421#define REAL_YSCALEYUV2RGB_UV(index, c) \
2da0d70d
DB
422 "xor "#index", "#index" \n\t"\
423 ASMALIGN(4)\
424 "1: \n\t"\
425 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
426 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8b2fce0d
MN
427 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
428 "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
2da0d70d
DB
429 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
430 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
431 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
432 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
433 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
434 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
435 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
436 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
437 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
438 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
439 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
440 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
441 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
442 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
443 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
444 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
df57ab14 445
786dcfef
CS
446#define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \
447 "movq ("#b1", "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\
448 "movq ("#b2", "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\
449 "movq 8("#b1", "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\
450 "movq 8("#b2", "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\
2da0d70d
DB
451 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
452 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
453 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
454 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
455 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
456 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
457 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
458 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
df57ab14
CS
459
460#define REAL_YSCALEYUV2RGB_COEFF(c) \
2da0d70d
DB
461 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
462 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
463 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
464 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
465 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
466 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
467 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
468 "paddw %%mm3, %%mm4 \n\t"\
469 "movq %%mm2, %%mm0 \n\t"\
470 "movq %%mm5, %%mm6 \n\t"\
471 "movq %%mm4, %%mm3 \n\t"\
472 "punpcklwd %%mm2, %%mm2 \n\t"\
473 "punpcklwd %%mm5, %%mm5 \n\t"\
474 "punpcklwd %%mm4, %%mm4 \n\t"\
475 "paddw %%mm1, %%mm2 \n\t"\
476 "paddw %%mm1, %%mm5 \n\t"\
477 "paddw %%mm1, %%mm4 \n\t"\
478 "punpckhwd %%mm0, %%mm0 \n\t"\
479 "punpckhwd %%mm6, %%mm6 \n\t"\
480 "punpckhwd %%mm3, %%mm3 \n\t"\
481 "paddw %%mm7, %%mm0 \n\t"\
482 "paddw %%mm7, %%mm6 \n\t"\
483 "paddw %%mm7, %%mm3 \n\t"\
484 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
485 "packuswb %%mm0, %%mm2 \n\t"\
486 "packuswb %%mm6, %%mm5 \n\t"\
487 "packuswb %%mm3, %%mm4 \n\t"\
40494418 488
786dcfef 489#define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
df57ab14
CS
490
491#define YSCALEYUV2RGB(index, c) \
492 REAL_YSCALEYUV2RGB_UV(index, c) \
786dcfef 493 REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
df57ab14 494 REAL_YSCALEYUV2RGB_COEFF(c)
6a4970ab 495
6e1c66bc 496#define REAL_YSCALEYUV2PACKED1(index, c) \
2da0d70d
DB
497 "xor "#index", "#index" \n\t"\
498 ASMALIGN(4)\
499 "1: \n\t"\
500 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
8b2fce0d 501 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
2da0d70d
DB
502 "psraw $7, %%mm3 \n\t" \
503 "psraw $7, %%mm4 \n\t" \
504 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
505 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
506 "psraw $7, %%mm1 \n\t" \
507 "psraw $7, %%mm7 \n\t" \
6a4970ab 508
6e1c66bc 509#define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c)
6a4970ab 510
6e1c66bc 511#define REAL_YSCALEYUV2RGB1(index, c) \
2da0d70d
DB
512 "xor "#index", "#index" \n\t"\
513 ASMALIGN(4)\
514 "1: \n\t"\
515 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
8b2fce0d 516 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
2da0d70d
DB
517 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
518 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
519 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
520 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
521 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
522 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
523 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
524 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
525 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
526 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
527 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
528 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
529 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
530 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
531 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
532 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
533 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
534 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
535 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
536 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
537 "paddw %%mm3, %%mm4 \n\t"\
538 "movq %%mm2, %%mm0 \n\t"\
539 "movq %%mm5, %%mm6 \n\t"\
540 "movq %%mm4, %%mm3 \n\t"\
541 "punpcklwd %%mm2, %%mm2 \n\t"\
542 "punpcklwd %%mm5, %%mm5 \n\t"\
543 "punpcklwd %%mm4, %%mm4 \n\t"\
544 "paddw %%mm1, %%mm2 \n\t"\
545 "paddw %%mm1, %%mm5 \n\t"\
546 "paddw %%mm1, %%mm4 \n\t"\
547 "punpckhwd %%mm0, %%mm0 \n\t"\
548 "punpckhwd %%mm6, %%mm6 \n\t"\
549 "punpckhwd %%mm3, %%mm3 \n\t"\
550 "paddw %%mm7, %%mm0 \n\t"\
551 "paddw %%mm7, %%mm6 \n\t"\
552 "paddw %%mm7, %%mm3 \n\t"\
553 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
554 "packuswb %%mm0, %%mm2 \n\t"\
555 "packuswb %%mm6, %%mm5 \n\t"\
556 "packuswb %%mm3, %%mm4 \n\t"\
40494418 557
6e1c66bc 558#define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c)
497d4f99 559
6e1c66bc 560#define REAL_YSCALEYUV2PACKED1b(index, c) \
2da0d70d
DB
561 "xor "#index", "#index" \n\t"\
562 ASMALIGN(4)\
563 "1: \n\t"\
564 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
565 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8b2fce0d
MN
566 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
567 "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
2da0d70d
DB
568 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
569 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
570 "psrlw $8, %%mm3 \n\t" \
571 "psrlw $8, %%mm4 \n\t" \
572 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
573 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
574 "psraw $7, %%mm1 \n\t" \
575 "psraw $7, %%mm7 \n\t"
6e1c66bc 576#define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c)
6a4970ab 577
497d4f99 578// do vertical chrominance interpolation
6e1c66bc 579#define REAL_YSCALEYUV2RGB1b(index, c) \
2da0d70d
DB
580 "xor "#index", "#index" \n\t"\
581 ASMALIGN(4)\
582 "1: \n\t"\
583 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
584 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8b2fce0d
MN
585 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
586 "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
2da0d70d
DB
587 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
588 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
589 "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\
590 "psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\
591 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
592 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
593 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
594 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
595 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
596 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
597 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
598 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
599 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
600 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
601 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
602 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
603 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
604 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
605 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
606 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
607 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
608 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
609 "paddw %%mm3, %%mm4 \n\t"\
610 "movq %%mm2, %%mm0 \n\t"\
611 "movq %%mm5, %%mm6 \n\t"\
612 "movq %%mm4, %%mm3 \n\t"\
613 "punpcklwd %%mm2, %%mm2 \n\t"\
614 "punpcklwd %%mm5, %%mm5 \n\t"\
615 "punpcklwd %%mm4, %%mm4 \n\t"\
616 "paddw %%mm1, %%mm2 \n\t"\
617 "paddw %%mm1, %%mm5 \n\t"\
618 "paddw %%mm1, %%mm4 \n\t"\
619 "punpckhwd %%mm0, %%mm0 \n\t"\
620 "punpckhwd %%mm6, %%mm6 \n\t"\
621 "punpckhwd %%mm3, %%mm3 \n\t"\
622 "paddw %%mm7, %%mm0 \n\t"\
623 "paddw %%mm7, %%mm6 \n\t"\
624 "paddw %%mm7, %%mm3 \n\t"\
625 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
626 "packuswb %%mm0, %%mm2 \n\t"\
627 "packuswb %%mm6, %%mm5 \n\t"\
628 "packuswb %%mm3, %%mm4 \n\t"\
40494418 629
6e1c66bc 630#define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c)
d604bab9 631
6858492e
CS
632#define REAL_YSCALEYUV2RGB1_ALPHA(index) \
633 "movq (%1, "#index", 2), %%mm7 \n\t" /* abuf0[index ] */\
634 "movq 8(%1, "#index", 2), %%mm1 \n\t" /* abuf0[index+4] */\
635 "psraw $7, %%mm7 \n\t" /* abuf0[index ] >>7 */\
636 "psraw $7, %%mm1 \n\t" /* abuf0[index+4] >>7 */\
637 "packuswb %%mm1, %%mm7 \n\t"
638#define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
639
9c77b26b
CS
640#define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
641 "movq "#b", "#q2" \n\t" /* B */\
642 "movq "#r", "#t" \n\t" /* R */\
643 "punpcklbw "#g", "#b" \n\t" /* GBGBGBGB 0 */\
644 "punpcklbw "#a", "#r" \n\t" /* ARARARAR 0 */\
645 "punpckhbw "#g", "#q2" \n\t" /* GBGBGBGB 2 */\
646 "punpckhbw "#a", "#t" \n\t" /* ARARARAR 2 */\
647 "movq "#b", "#q0" \n\t" /* GBGBGBGB 0 */\
648 "movq "#q2", "#q3" \n\t" /* GBGBGBGB 2 */\
649 "punpcklwd "#r", "#q0" \n\t" /* ARGBARGB 0 */\
650 "punpckhwd "#r", "#b" \n\t" /* ARGBARGB 1 */\
651 "punpcklwd "#t", "#q2" \n\t" /* ARGBARGB 2 */\
652 "punpckhwd "#t", "#q3" \n\t" /* ARGBARGB 3 */\
d604bab9 653\
9c77b26b
CS
654 MOVNTQ( q0, (dst, index, 4))\
655 MOVNTQ( b, 8(dst, index, 4))\
656 MOVNTQ( q2, 16(dst, index, 4))\
657 MOVNTQ( q3, 24(dst, index, 4))\
d604bab9 658\
2da0d70d
DB
659 "add $8, "#index" \n\t"\
660 "cmp "#dstw", "#index" \n\t"\
661 " jb 1b \n\t"
9c77b26b 662#define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
d604bab9 663
27a90b04 664#define REAL_WRITERGB16(dst, dstw, index) \
2da0d70d
DB
665 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
666 "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\
667 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
668 "psrlq $3, %%mm2 \n\t"\
d604bab9 669\
2da0d70d
DB
670 "movq %%mm2, %%mm1 \n\t"\
671 "movq %%mm4, %%mm3 \n\t"\
d604bab9 672\
2da0d70d
DB
673 "punpcklbw %%mm7, %%mm3 \n\t"\
674 "punpcklbw %%mm5, %%mm2 \n\t"\
675 "punpckhbw %%mm7, %%mm4 \n\t"\
676 "punpckhbw %%mm5, %%mm1 \n\t"\
d604bab9 677\
2da0d70d
DB
678 "psllq $3, %%mm3 \n\t"\
679 "psllq $3, %%mm4 \n\t"\
d604bab9 680\
2da0d70d
DB
681 "por %%mm3, %%mm2 \n\t"\
682 "por %%mm4, %%mm1 \n\t"\
d604bab9 683\
2da0d70d
DB
684 MOVNTQ(%%mm2, (dst, index, 2))\
685 MOVNTQ(%%mm1, 8(dst, index, 2))\
d604bab9 686\
2da0d70d
DB
687 "add $8, "#index" \n\t"\
688 "cmp "#dstw", "#index" \n\t"\
689 " jb 1b \n\t"
27a90b04 690#define WRITERGB16(dst, dstw, index) REAL_WRITERGB16(dst, dstw, index)
d604bab9 691
27a90b04 692#define REAL_WRITERGB15(dst, dstw, index) \
2da0d70d
DB
693 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
694 "pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\
695 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
696 "psrlq $3, %%mm2 \n\t"\
697 "psrlq $1, %%mm5 \n\t"\
d604bab9 698\
2da0d70d
DB
699 "movq %%mm2, %%mm1 \n\t"\
700 "movq %%mm4, %%mm3 \n\t"\
d604bab9 701\
2da0d70d
DB
702 "punpcklbw %%mm7, %%mm3 \n\t"\
703 "punpcklbw %%mm5, %%mm2 \n\t"\
704 "punpckhbw %%mm7, %%mm4 \n\t"\
705 "punpckhbw %%mm5, %%mm1 \n\t"\
d604bab9 706\
2da0d70d
DB
707 "psllq $2, %%mm3 \n\t"\
708 "psllq $2, %%mm4 \n\t"\
d604bab9 709\
2da0d70d
DB
710 "por %%mm3, %%mm2 \n\t"\
711 "por %%mm4, %%mm1 \n\t"\
d604bab9 712\
2da0d70d
DB
713 MOVNTQ(%%mm2, (dst, index, 2))\
714 MOVNTQ(%%mm1, 8(dst, index, 2))\
d604bab9 715\
2da0d70d
DB
716 "add $8, "#index" \n\t"\
717 "cmp "#dstw", "#index" \n\t"\
718 " jb 1b \n\t"
27a90b04 719#define WRITERGB15(dst, dstw, index) REAL_WRITERGB15(dst, dstw, index)
f62255fb 720
6542b44e 721#define WRITEBGR24OLD(dst, dstw, index) \
2da0d70d
DB
722 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
723 "movq %%mm2, %%mm1 \n\t" /* B */\
724 "movq %%mm5, %%mm6 \n\t" /* R */\
725 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
726 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
727 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
728 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
729 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
730 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
731 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
732 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
733 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
734 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
d604bab9 735\
2da0d70d
DB
736 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
737 "psrlq $8, %%mm0 \n\t" /* 00RGB0RG 0 */\
738 "pand "MANGLE(bm00000111)", %%mm4 \n\t" /* 00000RGB 0 */\
739 "pand "MANGLE(bm11111000)", %%mm0 \n\t" /* 00RGB000 0.5 */\
740 "por %%mm4, %%mm0 \n\t" /* 00RGBRGB 0 */\
741 "movq %%mm2, %%mm4 \n\t" /* 0RGB0RGB 1 */\
742 "psllq $48, %%mm2 \n\t" /* GB000000 1 */\
743 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\
d604bab9 744\
2da0d70d
DB
745 "movq %%mm4, %%mm2 \n\t" /* 0RGB0RGB 1 */\
746 "psrld $16, %%mm4 \n\t" /* 000R000R 1 */\
747 "psrlq $24, %%mm2 \n\t" /* 0000RGB0 1.5 */\
748 "por %%mm4, %%mm2 \n\t" /* 000RRGBR 1 */\
749 "pand "MANGLE(bm00001111)", %%mm2 \n\t" /* 0000RGBR 1 */\
750 "movq %%mm1, %%mm4 \n\t" /* 0RGB0RGB 2 */\
751 "psrlq $8, %%mm1 \n\t" /* 00RGB0RG 2 */\
752 "pand "MANGLE(bm00000111)", %%mm4 \n\t" /* 00000RGB 2 */\
753 "pand "MANGLE(bm11111000)", %%mm1 \n\t" /* 00RGB000 2.5 */\
754 "por %%mm4, %%mm1 \n\t" /* 00RGBRGB 2 */\
755 "movq %%mm1, %%mm4 \n\t" /* 00RGBRGB 2 */\
756 "psllq $32, %%mm1 \n\t" /* BRGB0000 2 */\
757 "por %%mm1, %%mm2 \n\t" /* BRGBRGBR 1 */\
d604bab9 758\
2da0d70d
DB
759 "psrlq $32, %%mm4 \n\t" /* 000000RG 2.5 */\
760 "movq %%mm3, %%mm5 \n\t" /* 0RGB0RGB 3 */\
761 "psrlq $8, %%mm3 \n\t" /* 00RGB0RG 3 */\
762 "pand "MANGLE(bm00000111)", %%mm5 \n\t" /* 00000RGB 3 */\
763 "pand "MANGLE(bm11111000)", %%mm3 \n\t" /* 00RGB000 3.5 */\
764 "por %%mm5, %%mm3 \n\t" /* 00RGBRGB 3 */\
765 "psllq $16, %%mm3 \n\t" /* RGBRGB00 3 */\
766 "por %%mm4, %%mm3 \n\t" /* RGBRGBRG 2.5 */\
d604bab9 767\
2da0d70d
DB
768 MOVNTQ(%%mm0, (dst))\
769 MOVNTQ(%%mm2, 8(dst))\
770 MOVNTQ(%%mm3, 16(dst))\
771 "add $24, "#dst" \n\t"\
d604bab9 772\
2da0d70d
DB
773 "add $8, "#index" \n\t"\
774 "cmp "#dstw", "#index" \n\t"\
775 " jb 1b \n\t"
d604bab9 776
6542b44e 777#define WRITEBGR24MMX(dst, dstw, index) \
2da0d70d
DB
778 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
779 "movq %%mm2, %%mm1 \n\t" /* B */\
780 "movq %%mm5, %%mm6 \n\t" /* R */\
781 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
782 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
783 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
784 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
785 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
786 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
787 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
788 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
789 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
790 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
99d2cb72 791\
2da0d70d
DB
792 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
793 "movq %%mm2, %%mm6 \n\t" /* 0RGB0RGB 1 */\
794 "movq %%mm1, %%mm5 \n\t" /* 0RGB0RGB 2 */\
795 "movq %%mm3, %%mm7 \n\t" /* 0RGB0RGB 3 */\
99d2cb72 796\
2da0d70d
DB
797 "psllq $40, %%mm0 \n\t" /* RGB00000 0 */\
798 "psllq $40, %%mm2 \n\t" /* RGB00000 1 */\
799 "psllq $40, %%mm1 \n\t" /* RGB00000 2 */\
800 "psllq $40, %%mm3 \n\t" /* RGB00000 3 */\
99d2cb72 801\
2da0d70d
DB
802 "punpckhdq %%mm4, %%mm0 \n\t" /* 0RGBRGB0 0 */\
803 "punpckhdq %%mm6, %%mm2 \n\t" /* 0RGBRGB0 1 */\
804 "punpckhdq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */\
805 "punpckhdq %%mm7, %%mm3 \n\t" /* 0RGBRGB0 3 */\
99d2cb72 806\
2da0d70d
DB
807 "psrlq $8, %%mm0 \n\t" /* 00RGBRGB 0 */\
808 "movq %%mm2, %%mm6 \n\t" /* 0RGBRGB0 1 */\
809 "psllq $40, %%mm2 \n\t" /* GB000000 1 */\
810 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\
811 MOVNTQ(%%mm0, (dst))\
99d2cb72 812\
2da0d70d
DB
813 "psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */\
814 "movq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */\
815 "psllq $24, %%mm1 \n\t" /* BRGB0000 2 */\
816 "por %%mm1, %%mm6 \n\t" /* BRGBRGBR 1 */\
817 MOVNTQ(%%mm6, 8(dst))\
99d2cb72 818\
2da0d70d
DB
819 "psrlq $40, %%mm5 \n\t" /* 000000RG 2 */\
820 "psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */\
821 "por %%mm3, %%mm5 \n\t" /* RGBRGBRG 2 */\
822 MOVNTQ(%%mm5, 16(dst))\
99d2cb72 823\
2da0d70d 824 "add $24, "#dst" \n\t"\
99d2cb72 825\
2da0d70d
DB
826 "add $8, "#index" \n\t"\
827 "cmp "#dstw", "#index" \n\t"\
828 " jb 1b \n\t"
99d2cb72 829
6542b44e 830#define WRITEBGR24MMX2(dst, dstw, index) \
2da0d70d 831 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
5802683a
RD
832 "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
833 "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
2da0d70d
DB
834 "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\
835 "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\
836 "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\
99d2cb72 837\
2da0d70d
DB
838 "pand %%mm0, %%mm1 \n\t" /* B2 B1 B0 */\
839 "pand %%mm0, %%mm3 \n\t" /* G2 G1 G0 */\
840 "pand %%mm7, %%mm6 \n\t" /* R1 R0 */\
99d2cb72 841\
2da0d70d
DB
842 "psllq $8, %%mm3 \n\t" /* G2 G1 G0 */\
843 "por %%mm1, %%mm6 \n\t"\
844 "por %%mm3, %%mm6 \n\t"\
845 MOVNTQ(%%mm6, (dst))\
99d2cb72 846\
2da0d70d
DB
847 "psrlq $8, %%mm4 \n\t" /* 00 G7 G6 G5 G4 G3 G2 G1 */\
848 "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4 B3 B2 B3 B2 */\
849 "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\
850 "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\
99d2cb72 851\
5802683a 852 "pand "MANGLE(ff_M24B)", %%mm1 \n\t" /* B5 B4 B3 */\
2da0d70d
DB
853 "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\
854 "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\
99d2cb72 855\
2da0d70d
DB
856 "por %%mm1, %%mm3 \n\t" /* B5 G4 B4 G3 B3 */\
857 "por %%mm3, %%mm6 \n\t"\
858 MOVNTQ(%%mm6, 8(dst))\
99d2cb72 859\
2da0d70d
DB
860 "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6 B7 B6 B6 B7 */\
861 "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7 G6 G5 G6 G5 */\
862 "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6 R5 R4 R5 R4 */\
99d2cb72 863\
2da0d70d
DB
864 "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\
865 "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\
5802683a 866 "pand "MANGLE(ff_M24B)", %%mm6 \n\t" /* R7 R6 R5 */\
99d2cb72 867\
2da0d70d
DB
868 "por %%mm1, %%mm3 \n\t"\
869 "por %%mm3, %%mm6 \n\t"\
870 MOVNTQ(%%mm6, 16(dst))\
99d2cb72 871\
2da0d70d 872 "add $24, "#dst" \n\t"\
99d2cb72 873\
2da0d70d
DB
874 "add $8, "#index" \n\t"\
875 "cmp "#dstw", "#index" \n\t"\
876 " jb 1b \n\t"
99d2cb72 877
94daf2e9 878#if COMPILE_TEMPLATE_MMX2
7630f2e0 879#undef WRITEBGR24
6e1c66bc 880#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index)
99d2cb72 881#else
7630f2e0 882#undef WRITEBGR24
6e1c66bc 883#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index)
99d2cb72
MN
884#endif
885
6e1c66bc 886#define REAL_WRITEYUY2(dst, dstw, index) \
2da0d70d
DB
887 "packuswb %%mm3, %%mm3 \n\t"\
888 "packuswb %%mm4, %%mm4 \n\t"\
889 "packuswb %%mm7, %%mm1 \n\t"\
890 "punpcklbw %%mm4, %%mm3 \n\t"\
891 "movq %%mm1, %%mm7 \n\t"\
892 "punpcklbw %%mm3, %%mm1 \n\t"\
893 "punpckhbw %%mm3, %%mm7 \n\t"\
25593e29 894\
2da0d70d
DB
895 MOVNTQ(%%mm1, (dst, index, 2))\
896 MOVNTQ(%%mm7, 8(dst, index, 2))\
25593e29 897\
2da0d70d
DB
898 "add $8, "#index" \n\t"\
899 "cmp "#dstw", "#index" \n\t"\
900 " jb 1b \n\t"
6e1c66bc 901#define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index)
25593e29
MN
902
903
7ac40327
RP
904static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
905 const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc,
6858492e 906 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
38858470 907{
94daf2e9 908#if COMPILE_TEMPLATE_MMX
dd68318c
RP
909 if(!(c->flags & SWS_BITEXACT)) {
910 if (c->flags & SWS_ACCURATE_RND) {
911 if (uDest) {
14014d47
MN
912 YSCALEYUV2YV12X_ACCURATE( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
913 YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
914 }
dd68318c 915 if (CONFIG_SWSCALE_ALPHA && aDest) {
6858492e
CS
916 YSCALEYUV2YV12X_ACCURATE( "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
917 }
bca11e75 918
14014d47 919 YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
dd68318c
RP
920 } else {
921 if (uDest) {
14014d47
MN
922 YSCALEYUV2YV12X( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
923 YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
924 }
dd68318c 925 if (CONFIG_SWSCALE_ALPHA && aDest) {
6858492e
CS
926 YSCALEYUV2YV12X( "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
927 }
2da0d70d 928
14014d47
MN
929 YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
930 }
f433c8ab
MN
931 return;
932 }
933#endif
94daf2e9 934#if COMPILE_TEMPLATE_ALTIVEC
9b734d44
RP
935 yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
936 chrFilter, chrSrc, chrFilterSize,
937 dest, uDest, vDest, dstW, chrDstW);
94daf2e9 938#else //COMPILE_TEMPLATE_ALTIVEC
9b734d44
RP
939 yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
940 chrFilter, chrSrc, chrFilterSize,
941 alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW);
94daf2e9 942#endif //!COMPILE_TEMPLATE_ALTIVEC
c1b0bfb4 943}
2add307d 944
7ac40327
RP
945static inline void RENAME(yuv2nv12X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
946 const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
2da0d70d 947 uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
6118e52e 948{
9b734d44
RP
949 yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
950 chrFilter, chrSrc, chrFilterSize,
951 dest, uDest, dstW, chrDstW, dstFormat);
6118e52e
VS
952}
953
7ac40327 954static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc,
6858492e 955 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
c1b0bfb4 956{
f433c8ab 957 int i;
94daf2e9 958#if COMPILE_TEMPLATE_MMX
dd68318c 959 if(!(c->flags & SWS_BITEXACT)) {
6858492e
CS
960 long p= 4;
961 uint8_t *src[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
962 uint8_t *dst[4]= {aDest, dest, uDest, vDest};
963 x86_reg counter[4]= {dstW, dstW, chrDstW, chrDstW};
2da0d70d 964
dd68318c
RP
965 if (c->flags & SWS_ACCURATE_RND) {
966 while(p--) {
967 if (dst[p]) {
3164d25e
CS
968 __asm__ volatile(
969 YSCALEYUV2YV121_ACCURATE
970 :: "r" (src[p]), "r" (dst[p] + counter[p]),
971 "g" (-counter[p])
972 : "%"REG_a
973 );
974 }
6858492e 975 }
dd68318c
RP
976 } else {
977 while(p--) {
978 if (dst[p]) {
3164d25e
CS
979 __asm__ volatile(
980 YSCALEYUV2YV121
981 :: "r" (src[p]), "r" (dst[p] + counter[p]),
982 "g" (-counter[p])
983 : "%"REG_a
984 );
985 }
6858492e 986 }
d78c1ea1 987 }
f433c8ab
MN
988 return;
989 }
990#endif
dd68318c 991 for (i=0; i<dstW; i++) {
a1f3ffa3 992 int val= (lumSrc[i]+64)>>7;
2da0d70d 993
dd68318c 994 if (val&256) {
2da0d70d
DB
995 if (val<0) val=0;
996 else val=255;
997 }
998
999 dest[i]= val;
1000 }
1001
1b0a4572 1002 if (uDest)
dd68318c 1003 for (i=0; i<chrDstW; i++) {
a1f3ffa3
MN
1004 int u=(chrSrc[i ]+64)>>7;
1005 int v=(chrSrc[i + VOFW]+64)>>7;
2da0d70d 1006
dd68318c 1007 if ((u|v)&256) {
2da0d70d
DB
1008 if (u<0) u=0;
1009 else if (u>255) u=255;
1010 if (v<0) v=0;
1011 else if (v>255) v=255;
1012 }
1013
1014 uDest[i]= u;
1015 vDest[i]= v;
1016 }
6858492e
CS
1017
1018 if (CONFIG_SWSCALE_ALPHA && aDest)
dd68318c 1019 for (i=0; i<dstW; i++) {
6858492e
CS
1020 int val= (alpSrc[i]+64)>>7;
1021 aDest[i]= av_clip_uint8(val);
1022 }
38858470
MN
1023}
1024
c1b0bfb4 1025
d604bab9
MN
1026/**
1027 * vertical scale YV12 to RGB
1028 */
7ac40327
RP
1029static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
1030 const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
1031 const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
c1b0bfb4 1032{
94daf2e9 1033#if COMPILE_TEMPLATE_MMX
d0ce212a 1034 x86_reg dummy=0;
dd68318c
RP
1035 if(!(c->flags & SWS_BITEXACT)) {
1036 if (c->flags & SWS_ACCURATE_RND) {
1037 switch(c->dstFormat) {
14014d47 1038 case PIX_FMT_RGB32:
dd68318c 1039 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
6858492e
CS
1040 YSCALEYUV2PACKEDX_ACCURATE
1041 YSCALEYUV2RGBX
1042 "movq %%mm2, "U_TEMP"(%0) \n\t"
1043 "movq %%mm4, "V_TEMP"(%0) \n\t"
1044 "movq %%mm5, "Y_TEMP"(%0) \n\t"
1045 YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET)
1046 "movq "Y_TEMP"(%0), %%mm5 \n\t"
1047 "psraw $3, %%mm1 \n\t"
1048 "psraw $3, %%mm7 \n\t"
1049 "packuswb %%mm7, %%mm1 \n\t"
1050 WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
1051
1052 YSCALEYUV2PACKEDX_END
dd68318c 1053 } else {
3164d25e
CS
1054 YSCALEYUV2PACKEDX_ACCURATE
1055 YSCALEYUV2RGBX
1056 "pcmpeqd %%mm7, %%mm7 \n\t"
1057 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
2da0d70d 1058
3164d25e 1059 YSCALEYUV2PACKEDX_END
6858492e 1060 }
14014d47
MN
1061 return;
1062 case PIX_FMT_BGR24:
1063 YSCALEYUV2PACKEDX_ACCURATE
1064 YSCALEYUV2RGBX
40494418 1065 "pxor %%mm7, %%mm7 \n\t"
14014d47
MN
1066 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
1067 "add %4, %%"REG_c" \n\t"
1068 WRITEBGR24(%%REGc, %5, %%REGa)
2da0d70d
DB
1069
1070
14014d47
MN
1071 :: "r" (&c->redDither),
1072 "m" (dummy), "m" (dummy), "m" (dummy),
1073 "r" (dest), "m" (dstW)
1074 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
1075 );
1076 return;
1077 case PIX_FMT_RGB555:
1078 YSCALEYUV2PACKEDX_ACCURATE
1079 YSCALEYUV2RGBX
40494418 1080 "pxor %%mm7, %%mm7 \n\t"
14014d47 1081 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
bca11e75 1082#ifdef DITHER1XBPP
88e2a9ae
CEH
1083 "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
1084 "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
1085 "paddusb "RED_DITHER"(%0), %%mm5\n\t"
2da0d70d
DB
1086#endif
1087
14014d47
MN
1088 WRITERGB15(%4, %5, %%REGa)
1089 YSCALEYUV2PACKEDX_END
1090 return;
1091 case PIX_FMT_RGB565:
1092 YSCALEYUV2PACKEDX_ACCURATE
1093 YSCALEYUV2RGBX
40494418 1094 "pxor %%mm7, %%mm7 \n\t"
14014d47 1095 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
bca11e75 1096#ifdef DITHER1XBPP
88e2a9ae
CEH
1097 "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
1098 "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
1099 "paddusb "RED_DITHER"(%0), %%mm5\n\t"
2da0d70d
DB
1100#endif
1101
14014d47
MN
1102 WRITERGB16(%4, %5, %%REGa)
1103 YSCALEYUV2PACKEDX_END
1104 return;
1105 case PIX_FMT_YUYV422:
1106 YSCALEYUV2PACKEDX_ACCURATE
1107 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1108
1109 "psraw $3, %%mm3 \n\t"
1110 "psraw $3, %%mm4 \n\t"
1111 "psraw $3, %%mm1 \n\t"
1112 "psraw $3, %%mm7 \n\t"
1113 WRITEYUY2(%4, %5, %%REGa)
1114 YSCALEYUV2PACKEDX_END
1115 return;
1116 }
dd68318c
RP
1117 } else {
1118 switch(c->dstFormat) {
14014d47 1119 case PIX_FMT_RGB32:
dd68318c 1120 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
6858492e
CS
1121 YSCALEYUV2PACKEDX
1122 YSCALEYUV2RGBX
1123 YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
1124 "psraw $3, %%mm1 \n\t"
1125 "psraw $3, %%mm7 \n\t"
1126 "packuswb %%mm7, %%mm1 \n\t"
1127 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
1128 YSCALEYUV2PACKEDX_END
dd68318c 1129 } else {
3164d25e
CS
1130 YSCALEYUV2PACKEDX
1131 YSCALEYUV2RGBX
1132 "pcmpeqd %%mm7, %%mm7 \n\t"
1133 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1134 YSCALEYUV2PACKEDX_END
6858492e 1135 }
14014d47
MN
1136 return;
1137 case PIX_FMT_BGR24:
1138 YSCALEYUV2PACKEDX
1139 YSCALEYUV2RGBX
40494418 1140 "pxor %%mm7, %%mm7 \n\t"
14014d47
MN
1141 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t" //FIXME optimize
1142 "add %4, %%"REG_c" \n\t"
1143 WRITEBGR24(%%REGc, %5, %%REGa)
1144
1145 :: "r" (&c->redDither),
1146 "m" (dummy), "m" (dummy), "m" (dummy),
1147 "r" (dest), "m" (dstW)
1148 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
1149 );
1150 return;
1151 case PIX_FMT_RGB555:
1152 YSCALEYUV2PACKEDX
1153 YSCALEYUV2RGBX
40494418 1154 "pxor %%mm7, %%mm7 \n\t"
14014d47 1155 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
c1b0bfb4 1156#ifdef DITHER1XBPP
88e2a9ae
CEH
1157 "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
1158 "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
1159 "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
2da0d70d
DB
1160#endif
1161
14014d47
MN
1162 WRITERGB15(%4, %5, %%REGa)
1163 YSCALEYUV2PACKEDX_END
1164 return;
1165 case PIX_FMT_RGB565:
1166 YSCALEYUV2PACKEDX
1167 YSCALEYUV2RGBX
40494418 1168 "pxor %%mm7, %%mm7 \n\t"
14014d47 1169 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
c1b0bfb4 1170#ifdef DITHER1XBPP
88e2a9ae
CEH
1171 "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
1172 "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
1173 "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
2da0d70d
DB
1174#endif
1175
14014d47
MN
1176 WRITERGB16(%4, %5, %%REGa)
1177 YSCALEYUV2PACKEDX_END
1178 return;
1179 case PIX_FMT_YUYV422:
1180 YSCALEYUV2PACKEDX
1181 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1182
1183 "psraw $3, %%mm3 \n\t"
1184 "psraw $3, %%mm4 \n\t"
1185 "psraw $3, %%mm1 \n\t"
1186 "psraw $3, %%mm7 \n\t"
1187 WRITEYUY2(%4, %5, %%REGa)
1188 YSCALEYUV2PACKEDX_END
1189 return;
1190 }
bca11e75
MN
1191 }
1192 }
94daf2e9
RP
1193#endif /* COMPILE_TEMPLATE_MMX */
1194#if COMPILE_TEMPLATE_ALTIVEC
2da0d70d 1195 /* The following list of supported dstFormat values should
780daf2b 1196 match what's found in the body of ff_yuv2packedX_altivec() */
d55ef636 1197 if (!(c->flags & SWS_BITEXACT) && !c->alpPixBuf &&
9b734d44
RP
1198 (c->dstFormat==PIX_FMT_ABGR || c->dstFormat==PIX_FMT_BGRA ||
1199 c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
1200 c->dstFormat==PIX_FMT_RGBA || c->dstFormat==PIX_FMT_ARGB))
780daf2b
DB
1201 ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize,
1202 chrFilter, chrSrc, chrFilterSize,
1203 dest, dstW, dstY);
2da0d70d
DB
1204 else
1205#endif
1206 yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
1207 chrFilter, chrSrc, chrFilterSize,
6858492e 1208 alpSrc, dest, dstW, dstY);
c1b0bfb4
MN
1209}
1210
c1b0bfb4
MN
1211/**
1212 * vertical bilinear scale YV12 to RGB
1213 */
7ac40327
RP
1214static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
1215 const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
d604bab9 1216{
ac0ad729
MN
1217 int yalpha1=4095- yalpha;
1218 int uvalpha1=4095-uvalpha;
2da0d70d 1219 int i;
d604bab9 1220
94daf2e9 1221#if COMPILE_TEMPLATE_MMX
dd68318c
RP
1222 if(!(c->flags & SWS_BITEXACT)) {
1223 switch(c->dstFormat) {
c255994b
RP
1224 //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
1225 case PIX_FMT_RGB32:
1226 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
6858492e 1227#if ARCH_X86_64
c255994b 1228 __asm__ volatile(
6858492e
CS
1229 YSCALEYUV2RGB(%%REGBP, %5)
1230 YSCALEYUV2RGB_YA(%%REGBP, %5, %6, %7)
1231 "psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1232 "psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1233 "packuswb %%mm7, %%mm1 \n\t"
04ef1d3f 1234 WRITEBGR32(%4, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
6858492e 1235
04ef1d3f 1236 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "r" (dest),
6858492e
CS
1237 "a" (&c->redDither)
1238 ,"r" (abuf0), "r" (abuf1)
04ef1d3f 1239 : "%"REG_BP
c255994b 1240 );
6858492e 1241#else
c255994b
RP
1242 *(uint16_t **)(&c->u_temp)=abuf0;
1243 *(uint16_t **)(&c->v_temp)=abuf1;
1244 __asm__ volatile(
6858492e
CS
1245 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1246 "mov %4, %%"REG_b" \n\t"
1247 "push %%"REG_BP" \n\t"
1248 YSCALEYUV2RGB(%%REGBP, %5)
1249 "push %0 \n\t"
1250 "push %1 \n\t"
1251 "mov "U_TEMP"(%5), %0 \n\t"
1252 "mov "V_TEMP"(%5), %1 \n\t"
1253 YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1)
1254 "psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1255 "psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1256 "packuswb %%mm7, %%mm1 \n\t"
1257 "pop %1 \n\t"
1258 "pop %0 \n\t"
1259 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
1260 "pop %%"REG_BP" \n\t"
1261 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1262
1263 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1264 "a" (&c->redDither)
c255994b 1265 );
6858492e 1266#endif
c255994b
RP
1267 } else {
1268 __asm__ volatile(
3164d25e
CS
1269 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1270 "mov %4, %%"REG_b" \n\t"
1271 "push %%"REG_BP" \n\t"
1272 YSCALEYUV2RGB(%%REGBP, %5)
1273 "pcmpeqd %%mm7, %%mm7 \n\t"
1274 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1275 "pop %%"REG_BP" \n\t"
1276 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
2da0d70d 1277
3164d25e
CS
1278 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1279 "a" (&c->redDither)
c255994b
RP
1280 );
1281 }
1282 return;
1283 case PIX_FMT_BGR24:
1284 __asm__ volatile(
2da0d70d
DB
1285 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1286 "mov %4, %%"REG_b" \n\t"
1287 "push %%"REG_BP" \n\t"
1288 YSCALEYUV2RGB(%%REGBP, %5)
40494418 1289 "pxor %%mm7, %%mm7 \n\t"
2da0d70d
DB
1290 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1291 "pop %%"REG_BP" \n\t"
1292 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1293 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1294 "a" (&c->redDither)
c255994b
RP
1295 );
1296 return;
1297 case PIX_FMT_RGB555:
1298 __asm__ volatile(
2da0d70d
DB
1299 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1300 "mov %4, %%"REG_b" \n\t"
1301 "push %%"REG_BP" \n\t"
1302 YSCALEYUV2RGB(%%REGBP, %5)
40494418 1303 "pxor %%mm7, %%mm7 \n\t"
2da0d70d 1304 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
d604bab9 1305#ifdef DITHER1XBPP
88e2a9ae
CEH
1306 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1307 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1308 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
2da0d70d
DB
1309#endif
1310
27a90b04 1311 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
2da0d70d
DB
1312 "pop %%"REG_BP" \n\t"
1313 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1314
1315 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1316 "a" (&c->redDither)
c255994b
RP
1317 );
1318 return;
1319 case PIX_FMT_RGB565:
1320 __asm__ volatile(
2da0d70d
DB
1321 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1322 "mov %4, %%"REG_b" \n\t"
1323 "push %%"REG_BP" \n\t"
1324 YSCALEYUV2RGB(%%REGBP, %5)
40494418 1325 "pxor %%mm7, %%mm7 \n\t"
2da0d70d 1326 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
d604bab9 1327#ifdef DITHER1XBPP
88e2a9ae
CEH
1328 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1329 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1330 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
2da0d70d
DB
1331#endif
1332
27a90b04 1333 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
2da0d70d
DB
1334 "pop %%"REG_BP" \n\t"
1335 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1336 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1337 "a" (&c->redDither)
c255994b
RP
1338 );
1339 return;
1340 case PIX_FMT_YUYV422:
1341 __asm__ volatile(
2da0d70d
DB
1342 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1343 "mov %4, %%"REG_b" \n\t"
1344 "push %%"REG_BP" \n\t"
1345 YSCALEYUV2PACKED(%%REGBP, %5)
1346 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1347 "pop %%"REG_BP" \n\t"
1348 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1349 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1350 "a" (&c->redDither)
c255994b
RP
1351 );
1352 return;
1353 default: break;
2da0d70d 1354 }
f433c8ab 1355 }
94daf2e9 1356#endif //COMPILE_TEMPLATE_MMX
9b734d44 1357 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
d604bab9
MN
1358}
1359
1360/**
1361 * YV12 to RGB without scaling or interpolating
1362 */
7ac40327
RP
1363static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
1364 const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y)
d604bab9 1365{
2da0d70d
DB
1366 const int yalpha1=0;
1367 int i;
6a4970ab 1368
7ac40327 1369 const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
2da0d70d 1370 const int yalpha= 4096; //FIXME ...
96034638 1371
dd68318c 1372 if (flags&SWS_FULL_CHR_H_INT) {
40fa5140 1373 c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y);
2da0d70d
DB
1374 return;
1375 }
397c035e 1376
94daf2e9 1377#if COMPILE_TEMPLATE_MMX
dd68318c
RP
1378 if(!(flags & SWS_BITEXACT)) {
1379 if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1380 switch(dstFormat) {
14014d47 1381 case PIX_FMT_RGB32:
dd68318c 1382 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
6858492e 1383 __asm__ volatile(
c255994b
RP
1384 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1385 "mov %4, %%"REG_b" \n\t"
1386 "push %%"REG_BP" \n\t"
1387 YSCALEYUV2RGB1(%%REGBP, %5)
1388 YSCALEYUV2RGB1_ALPHA(%%REGBP)
1389 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1390 "pop %%"REG_BP" \n\t"
1391 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1392
1393 :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1394 "a" (&c->redDither)
6858492e 1395 );
dd68318c 1396 } else {
3164d25e 1397 __asm__ volatile(
c255994b
RP
1398 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1399 "mov %4, %%"REG_b" \n\t"
1400 "push %%"REG_BP" \n\t"
1401 YSCALEYUV2RGB1(%%REGBP, %5)
1402 "pcmpeqd %%mm7, %%mm7 \n\t"
1403 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1404 "pop %%"REG_BP" \n\t"
1405 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1406
1407 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1408 "a" (&c->redDither)
1409 );
1410 }
1411 return;
1412 case PIX_FMT_BGR24:
1413 __asm__ volatile(
3164d25e
CS
1414 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1415 "mov %4, %%"REG_b" \n\t"
1416 "push %%"REG_BP" \n\t"
1417 YSCALEYUV2RGB1(%%REGBP, %5)
c255994b
RP
1418 "pxor %%mm7, %%mm7 \n\t"
1419 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
3164d25e
CS
1420 "pop %%"REG_BP" \n\t"
1421 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
14014d47 1422
3164d25e
CS
1423 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1424 "a" (&c->redDither)
14014d47
MN
1425 );
1426 return;
1427 case PIX_FMT_RGB555:
7ad6469e 1428 __asm__ volatile(
c255994b
RP
1429 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1430 "mov %4, %%"REG_b" \n\t"
1431 "push %%"REG_BP" \n\t"
1432 YSCALEYUV2RGB1(%%REGBP, %5)
1433 "pxor %%mm7, %%mm7 \n\t"
1434 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
d604bab9 1435#ifdef DITHER1XBPP
c255994b
RP
1436 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1437 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1438 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
2da0d70d 1439#endif
c255994b
RP
1440 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
1441 "pop %%"REG_BP" \n\t"
1442 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
2da0d70d 1443
c255994b
RP
1444 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1445 "a" (&c->redDither)
14014d47
MN
1446 );
1447 return;
1448 case PIX_FMT_RGB565:
7ad6469e 1449 __asm__ volatile(
c255994b
RP
1450 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1451 "mov %4, %%"REG_b" \n\t"
1452 "push %%"REG_BP" \n\t"
1453 YSCALEYUV2RGB1(%%REGBP, %5)
1454 "pxor %%mm7, %%mm7 \n\t"
1455 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
d604bab9 1456#ifdef DITHER1XBPP
c255994b
RP
1457 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1458 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1459 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
2da0d70d
DB
1460#endif
1461
c255994b
RP
1462 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
1463 "pop %%"REG_BP" \n\t"
1464 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
2da0d70d 1465
c255994b
RP
1466 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1467 "a" (&c->redDither)
14014d47
MN
1468 );
1469 return;
1470 case PIX_FMT_YUYV422:
7ad6469e 1471 __asm__ volatile(
c255994b
RP
1472 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1473 "mov %4, %%"REG_b" \n\t"
1474 "push %%"REG_BP" \n\t"
1475 YSCALEYUV2PACKED1(%%REGBP, %5)
1476 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1477 "pop %%"REG_BP" \n\t"
1478 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
14014d47 1479
c255994b
RP
1480 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1481 "a" (&c->redDither)
14014d47
MN
1482 );
1483 return;
1484 }
dd68318c
RP
1485 } else {
1486 switch(dstFormat) {
14014d47 1487 case PIX_FMT_RGB32:
dd68318c 1488 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
6858492e 1489 __asm__ volatile(
c255994b
RP
1490 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1491 "mov %4, %%"REG_b" \n\t"
1492 "push %%"REG_BP" \n\t"
1493 YSCALEYUV2RGB1b(%%REGBP, %5)
1494 YSCALEYUV2RGB1_ALPHA(%%REGBP)
1495 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1496 "pop %%"REG_BP" \n\t"
1497 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1498
1499 :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1500 "a" (&c->redDither)
6858492e 1501 );
dd68318c 1502 } else {
3164d25e 1503 __asm__ volatile(
c255994b
RP
1504 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1505 "mov %4, %%"REG_b" \n\t"
1506 "push %%"REG_BP" \n\t"
1507 YSCALEYUV2RGB1b(%%REGBP, %5)
1508 "pcmpeqd %%mm7, %%mm7 \n\t"
1509 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1510 "pop %%"REG_BP" \n\t"
1511 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1512
1513 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1514 "a" (&c->redDither)
1515 );
1516 }
1517 return;
1518 case PIX_FMT_BGR24:
1519 __asm__ volatile(
3164d25e
CS
1520 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1521 "mov %4, %%"REG_b" \n\t"
1522 "push %%"REG_BP" \n\t"
1523 YSCALEYUV2RGB1b(%%REGBP, %5)
c255994b
RP
1524 "pxor %%mm7, %%mm7 \n\t"
1525 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
3164d25e
CS
1526 "pop %%"REG_BP" \n\t"
1527 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
14014d47 1528
3164d25e
CS
1529 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1530 "a" (&c->redDither)
14014d47
MN
1531 );
1532 return;
1533 case PIX_FMT_RGB555:
7ad6469e 1534 __asm__ volatile(
c255994b
RP
1535 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1536 "mov %4, %%"REG_b" \n\t"
1537 "push %%"REG_BP" \n\t"
1538 YSCALEYUV2RGB1b(%%REGBP, %5)
1539 "pxor %%mm7, %%mm7 \n\t"
1540 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
497d4f99 1541#ifdef DITHER1XBPP
c255994b
RP
1542 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1543 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1544 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
2da0d70d 1545#endif
c255994b
RP
1546 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
1547 "pop %%"REG_BP" \n\t"
1548 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
2da0d70d 1549
c255994b
RP
1550 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1551 "a" (&c->redDither)
14014d47
MN
1552 );
1553 return;
1554 case PIX_FMT_RGB565:
7ad6469e 1555 __asm__ volatile(
c255994b
RP
1556 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1557 "mov %4, %%"REG_b" \n\t"
1558 "push %%"REG_BP" \n\t"
1559 YSCALEYUV2RGB1b(%%REGBP, %5)
1560 "pxor %%mm7, %%mm7 \n\t"
1561 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
497d4f99 1562#ifdef DITHER1XBPP
c255994b
RP
1563 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1564 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1565 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
2da0d70d
DB
1566#endif
1567
c255994b
RP
1568 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
1569 "pop %%"REG_BP" \n\t"
1570 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
2da0d70d 1571
c255994b
RP
1572 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1573 "a" (&c->redDither)
14014d47
MN
1574 );
1575 return;
1576 case PIX_FMT_YUYV422:
7ad6469e 1577 __asm__ volatile(
c255994b
RP
1578 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1579 "mov %4, %%"REG_b" \n\t"
1580 "push %%"REG_BP" \n\t"
1581 YSCALEYUV2PACKED1b(%%REGBP, %5)
1582 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1583 "pop %%"REG_BP" \n\t"
1584 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
14014d47 1585
c255994b
RP
1586 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1587 "a" (&c->redDither)
14014d47
MN
1588 );
1589 return;
1590 }
2da0d70d
DB
1591 }
1592 }
94daf2e9 1593#endif /* COMPILE_TEMPLATE_MMX */
dd68318c 1594 if (uvalpha < 2048) {
6858492e 1595 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
dd68318c 1596 } else {
6858492e 1597 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
2da0d70d 1598 }
d604bab9
MN
1599}
1600
8a322796 1601//FIXME yuy2* can read up to 7 samples too much
6ff0ad6b 1602
7ac40327 1603static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
1e621b18 1604{
94daf2e9 1605#if COMPILE_TEMPLATE_MMX
7ad6469e 1606 __asm__ volatile(
c255994b
RP
1607 "movq "MANGLE(bm01010101)", %%mm2 \n\t"
1608 "mov %0, %%"REG_a" \n\t"
1609 "1: \n\t"
1610 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
1611 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
1612 "pand %%mm2, %%mm0 \n\t"
1613 "pand %%mm2, %%mm1 \n\t"
1614 "packuswb %%mm1, %%mm0 \n\t"
1615 "movq %%mm0, (%2, %%"REG_a") \n\t"
1616 "add $8, %%"REG_a" \n\t"
1617 " js 1b \n\t"
1618 : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width)
1619 : "%"REG_a
2da0d70d 1620 );
1e621b18 1621#else
2da0d70d
DB
1622 int i;
1623 for (i=0; i<width; i++)
1624 dst[i]= src[2*i];
1e621b18
MN
1625#endif
1626}
1627
7ac40327 1628static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
1e621b18 1629{
94daf2e9 1630#if COMPILE_TEMPLATE_MMX
7ad6469e 1631 __asm__ volatile(
c255994b
RP
1632 "movq "MANGLE(bm01010101)", %%mm4 \n\t"
1633 "mov %0, %%"REG_a" \n\t"
1634 "1: \n\t"
1635 "movq (%1, %%"REG_a",4), %%mm0 \n\t"
1636 "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
1637 "psrlw $8, %%mm0 \n\t"
1638 "psrlw $8, %%mm1 \n\t"
1639 "packuswb %%mm1, %%mm0 \n\t"
1640 "movq %%mm0, %%mm1 \n\t"
1641 "psrlw $8, %%mm0 \n\t"
1642 "pand %%mm4, %%mm1 \n\t"
1643 "packuswb %%mm0, %%mm0 \n\t"
1644 "packuswb %%mm1, %%mm1 \n\t"
1645 "movd %%mm0, (%3, %%"REG_a") \n\t"
1646 "movd %%mm1, (%2, %%"REG_a") \n\t"
1647 "add $4, %%"REG_a" \n\t"
1648 " js 1b \n\t"
1649 : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
1650 : "%"REG_a
2da0d70d 1651 );
1e621b18 1652#else
2da0d70d 1653 int i;
dd68318c 1654 for (i=0; i<width; i++) {
2da0d70d
DB
1655 dstU[i]= src1[4*i + 1];
1656 dstV[i]= src1[4*i + 3];
1657 }
1658#endif
1659 assert(src1 == src2);
1e621b18
MN
1660}
1661
de1275d5
MN
1662static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
1663{
94daf2e9 1664#if COMPILE_TEMPLATE_MMX
de1275d5 1665 __asm__ volatile(
c255994b
RP
1666 "mov %0, %%"REG_a" \n\t"
1667 "1: \n\t"
1668 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
1669 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
1670 "movq (%2, %%"REG_a",2), %%mm2 \n\t"
1671 "movq 8(%2, %%"REG_a",2), %%mm3 \n\t"
1672 "psrlw $8, %%mm0 \n\t"
1673 "psrlw $8, %%mm1 \n\t"
1674 "psrlw $8, %%mm2 \n\t"
1675 "psrlw $8, %%mm3 \n\t"
1676 "packuswb %%mm1, %%mm0 \n\t"
1677 "packuswb %%mm3, %%mm2 \n\t"
1678 "movq %%mm0, (%3, %%"REG_a") \n\t"
1679 "movq %%mm2, (%4, %%"REG_a") \n\t"
1680 "add $8, %%"REG_a" \n\t"
1681 " js 1b \n\t"
1682 : : "g" ((x86_reg)-width), "r" (src1+width*2), "r" (src2+width*2), "r" (dstU+width), "r" (dstV+width)
1683 : "%"REG_a
de1275d5
MN
1684 );
1685#else
1686 int i;
dd68318c 1687 for (i=0; i<width; i++) {
de1275d5
MN
1688 dstU[i]= src1[2*i + 1];
1689 dstV[i]= src2[2*i + 1];
1690 }
1691#endif
1692}
1693
4cf16bbe
DB
1694/* This is almost identical to the previous, end exists only because
1695 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
7ac40327 1696static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
7322a67c 1697{
94daf2e9 1698#if COMPILE_TEMPLATE_MMX
7ad6469e 1699 __asm__ volatile(
c255994b
RP
1700 "mov %0, %%"REG_a" \n\t"
1701 "1: \n\t"
1702 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
1703 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
1704 "psrlw $8, %%mm0 \n\t"
1705 "psrlw $8, %%mm1 \n\t"
1706 "packuswb %%mm1, %%mm0 \n\t"
1707 "movq %%mm0, (%2, %%"REG_a") \n\t"
1708 "add $8, %%"REG_a" \n\t"
1709 " js 1b \n\t"
1710 : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width)
1711 : "%"REG_a
2da0d70d 1712 );
7322a67c 1713#else
2da0d70d
DB
1714 int i;
1715 for (i=0; i<width; i++)
1716 dst[i]= src[2*i+1];
7322a67c
MN
1717#endif
1718}
1719
7ac40327 1720static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
7322a67c 1721{
94daf2e9 1722#if COMPILE_TEMPLATE_MMX
7ad6469e 1723 __asm__ volatile(
c255994b
RP
1724 "movq "MANGLE(bm01010101)", %%mm4 \n\t"
1725 "mov %0, %%"REG_a" \n\t"
1726 "1: \n\t"
1727 "movq (%1, %%"REG_a",4), %%mm0 \n\t"
1728 "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
1729 "pand %%mm4, %%mm0 \n\t"
1730 "pand %%mm4, %%mm1 \n\t"
1731 "packuswb %%mm1, %%mm0 \n\t"
1732 "movq %%mm0, %%mm1 \n\t"
1733 "psrlw $8, %%mm0 \n\t"
1734 "pand %%mm4, %%mm1 \n\t"
1735 "packuswb %%mm0, %%mm0 \n\t"
1736 "packuswb %%mm1, %%mm1 \n\t"
1737 "movd %%mm0, (%3, %%"REG_a") \n\t"
1738 "movd %%mm1, (%2, %%"REG_a") \n\t"
1739 "add $4, %%"REG_a" \n\t"
1740 " js 1b \n\t"
1741 : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
1742 : "%"REG_a
2da0d70d 1743 );
7322a67c 1744#else
2da0d70d 1745 int i;
dd68318c 1746 for (i=0; i<width; i++) {
2da0d70d
DB
1747 dstU[i]= src1[4*i + 0];
1748 dstV[i]= src1[4*i + 2];
1749 }
1750#endif
1751 assert(src1 == src2);
7322a67c
MN
1752}
1753
de1275d5
MN
1754static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
1755{
94daf2e9 1756#if COMPILE_TEMPLATE_MMX
de1275d5 1757 __asm__ volatile(
c255994b
RP
1758 "movq "MANGLE(bm01010101)", %%mm4 \n\t"
1759 "mov %0, %%"REG_a" \n\t"
1760 "1: \n\t"
1761 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
1762 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
1763 "movq (%2, %%"REG_a",2), %%mm2 \n\t"
1764 "movq 8(%2, %%"REG_a",2), %%mm3 \n\t"
1765 "pand %%mm4, %%mm0 \n\t"
1766 "pand %%mm4, %%mm1 \n\t"
1767 "pand %%mm4, %%mm2 \n\t"
1768 "pand %%mm4, %%mm3 \n\t"
1769 "packuswb %%mm1, %%mm0 \n\t"
1770 "packuswb %%mm3, %%mm2 \n\t"
1771 "movq %%mm0, (%3, %%"REG_a") \n\t"
1772 "movq %%mm2, (%4, %%"REG_a") \n\t"
1773 "add $8, %%"REG_a" \n\t"
1774 " js 1b \n\t"
1775 : : "g" ((x86_reg)-width), "r" (src1+width*2), "r" (src2+width*2), "r" (dstU+width), "r" (dstV+width)
1776 : "%"REG_a
de1275d5
MN
1777 );
1778#else
1779 int i;
dd68318c 1780 for (i=0; i<width; i++) {
de1275d5
MN
1781 dstU[i]= src1[2*i];
1782 dstV[i]= src2[2*i];
1783 }
1784#endif
1785}
1786
94daf2e9 1787#if COMPILE_TEMPLATE_MMX
7ac40327 1788static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, long width, int srcFormat)
dfb09bd1
MN
1789{
1790
dd68318c 1791 if(srcFormat == PIX_FMT_BGR24) {
7ad6469e 1792 __asm__ volatile(
ff9a056d
MN
1793 "movq "MANGLE(ff_bgr24toY1Coeff)", %%mm5 \n\t"
1794 "movq "MANGLE(ff_bgr24toY2Coeff)", %%mm6 \n\t"
1795 :
dfb09bd1 1796 );
dd68318c 1797 } else {
7ad6469e 1798 __asm__ volatile(
ff9a056d
MN
1799 "movq "MANGLE(ff_rgb24toY1Coeff)", %%mm5 \n\t"
1800 "movq "MANGLE(ff_rgb24toY2Coeff)", %%mm6 \n\t"
1801 :
dfb09bd1
MN
1802 );
1803 }
1804
7ad6469e 1805 __asm__ volatile(
dfb09bd1
MN
1806 "movq "MANGLE(ff_bgr24toYOffset)", %%mm4 \n\t"
1807 "mov %2, %%"REG_a" \n\t"
1808 "pxor %%mm7, %%mm7 \n\t"
1809 "1: \n\t"
1810 PREFETCH" 64(%0) \n\t"
1811 "movd (%0), %%mm0 \n\t"
1812 "movd 2(%0), %%mm1 \n\t"
1813 "movd 6(%0), %%mm2 \n\t"
1814 "movd 8(%0), %%mm3 \n\t"
1815 "add $12, %0 \n\t"
1816 "punpcklbw %%mm7, %%mm0 \n\t"
1817 "punpcklbw %%mm7, %%mm1 \n\t"
1818 "punpcklbw %%mm7, %%mm2 \n\t"
1819 "punpcklbw %%mm7, %%mm3 \n\t"
1820 "pmaddwd %%mm5, %%mm0 \n\t"
1821 "pmaddwd %%mm6, %%mm1 \n\t"
1822 "pmaddwd %%mm5, %%mm2 \n\t"
1823 "pmaddwd %%mm6, %%mm3 \n\t"
1824 "paddd %%mm1, %%mm0 \n\t"
1825 "paddd %%mm3, %%mm2 \n\t"
1826 "paddd %%mm4, %%mm0 \n\t"
1827 "paddd %%mm4, %%mm2 \n\t"
1828 "psrad $15, %%mm0 \n\t"
1829 "psrad $15, %%mm2 \n\t"
1830 "packssdw %%mm2, %%mm0 \n\t"
1831 "packuswb %%mm0, %%mm0 \n\t"
1832 "movd %%mm0, (%1, %%"REG_a") \n\t"
1833 "add $4, %%"REG_a" \n\t"
1834 " js 1b \n\t"
1835 : "+r" (src)
d0ce212a 1836 : "r" (dst+width), "g" ((x86_reg)-width)
dfb09bd1 1837 : "%"REG_a
2da0d70d 1838 );
dfb09bd1
MN
1839}
1840
7ac40327 1841static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, long width, int srcFormat)
dfb09bd1 1842{
7ad6469e 1843 __asm__ volatile(
dfb09bd1
MN
1844 "movq 24+%4, %%mm6 \n\t"
1845 "mov %3, %%"REG_a" \n\t"
1846 "pxor %%mm7, %%mm7 \n\t"
1847 "1: \n\t"
1848 PREFETCH" 64(%0) \n\t"
1849 "movd (%0), %%mm0 \n\t"
1850 "movd 2(%0), %%mm1 \n\t"
1851 "punpcklbw %%mm7, %%mm0 \n\t"
1852 "punpcklbw %%mm7, %%mm1 \n\t"
1853 "movq %%mm0, %%mm2 \n\t"
1854 "movq %%mm1, %%mm3 \n\t"
1855 "pmaddwd %4, %%mm0 \n\t"
1856 "pmaddwd 8+%4, %%mm1 \n\t"
1857 "pmaddwd 16+%4, %%mm2 \n\t"
1858 "pmaddwd %%mm6, %%mm3 \n\t"
1859 "paddd %%mm1, %%mm0 \n\t"
1860 "paddd %%mm3, %%mm2 \n\t"
1861
1862 "movd 6(%0), %%mm1 \n\t"
1863 "movd 8(%0), %%mm3 \n\t"
1864 "add $12, %0 \n\t"
1865 "punpcklbw %%mm7, %%mm1 \n\t"
1866 "punpcklbw %%mm7, %%mm3 \n\t"
1867 "movq %%mm1, %%mm4 \n\t"
1868 "movq %%mm3, %%mm5 \n\t"
1869 "pmaddwd %4, %%mm1 \n\t"
1870 "pmaddwd 8+%4, %%mm3 \n\t"
1871 "pmaddwd 16+%4, %%mm4 \n\t"
1872 "pmaddwd %%mm6, %%mm5 \n\t"
1873 "paddd %%mm3, %%mm1 \n\t"
1874 "paddd %%mm5, %%mm4 \n\t"
1875
1876 "movq "MANGLE(ff_bgr24toUVOffset)", %%mm3 \n\t"
1877 "paddd %%mm3, %%mm0 \n\t"
1878 "paddd %%mm3, %%mm2 \n\t"
1879 "paddd %%mm3, %%mm1 \n\t"
1880 "paddd %%mm3, %%mm4 \n\t"
1881 "psrad $15, %%mm0 \n\t"
1882 "psrad $15, %%mm2 \n\t"
1883 "psrad $15, %%mm1 \n\t"
1884 "psrad $15, %%mm4 \n\t"
1885 "packssdw %%mm1, %%mm0 \n\t"
1886 "packssdw %%mm4, %%mm2 \n\t"
1887 "packuswb %%mm0, %%mm0 \n\t"
1888 "packuswb %%mm2, %%mm2 \n\t"
1889 "movd %%mm0, (%1, %%"REG_a") \n\t"
1890 "movd %%mm2, (%2, %%"REG_a") \n\t"
1891 "add $4, %%"REG_a" \n\t"
1892 " js 1b \n\t"
1893 : "+r" (src)
d0ce212a 1894 : "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-width), "m"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24][0])
dfb09bd1
MN
1895 : "%"REG_a
1896 );
1897}
1898#endif
1899
7ac40327 1900static inline void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
dfb09bd1 1901{
94daf2e9 1902#if COMPILE_TEMPLATE_MMX
a35acd7f 1903 RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
1e621b18 1904#else
2da0d70d 1905 int i;
dd68318c 1906 for (i=0; i<width; i++) {
2da0d70d
DB
1907 int b= src[i*3+0];
1908 int g= src[i*3+1];
1909 int r= src[i*3+2];
1e621b18 1910
e5091488 1911 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
2da0d70d 1912 }
94daf2e9 1913#endif /* COMPILE_TEMPLATE_MMX */
1e621b18
MN
1914}
1915
7ac40327 1916static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
1e621b18 1917{
94daf2e9 1918#if COMPILE_TEMPLATE_MMX
a35acd7f 1919 RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
1e621b18 1920#else
2da0d70d 1921 int i;
dd68318c 1922 for (i=0; i<width; i++) {
dfb09bd1
MN
1923 int b= src1[3*i + 0];
1924 int g= src1[3*i + 1];
1925 int r= src1[3*i + 2];
2da0d70d 1926
dfb09bd1
MN
1927 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1928 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
2da0d70d 1929 }
94daf2e9 1930#endif /* COMPILE_TEMPLATE_MMX */
2da0d70d 1931 assert(src1 == src2);
1e621b18
MN
1932}
1933
7ac40327 1934static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
2f60f629
MN
1935{
1936 int i;
dd68318c 1937 for (i=0; i<width; i++) {
2f60f629
MN
1938 int b= src1[6*i + 0] + src1[6*i + 3];
1939 int g= src1[6*i + 1] + src1[6*i + 4];
1940 int r= src1[6*i + 2] + src1[6*i + 5];
1941
1942 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1943 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1944 }
1945 assert(src1 == src2);
1946}
1947
7ac40327 1948static inline void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
a861d4d7 1949{
94daf2e9 1950#if COMPILE_TEMPLATE_MMX
a35acd7f 1951 RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
dfb09bd1 1952#else
2da0d70d 1953 int i;
dd68318c 1954 for (i=0; i<width; i++) {
2da0d70d
DB
1955 int r= src[i*3+0];
1956 int g= src[i*3+1];
1957 int b= src[i*3+2];
1958
e5091488 1959 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
2da0d70d 1960 }
dfb09bd1 1961#endif
a861d4d7
MN
1962}
1963
7ac40327 1964static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
a861d4d7 1965{
94daf2e9 1966#if COMPILE_TEMPLATE_MMX
5155b839 1967 assert(src1==src2);
a35acd7f 1968 RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
dfb09bd1 1969#else
5155b839
DB
1970 int i;
1971 assert(src1==src2);
dd68318c 1972 for (i=0; i<width; i++) {
dfb09bd1
MN
1973 int r= src1[3*i + 0];
1974 int g= src1[3*i + 1];
1975 int b= src1[3*i + 2];
2da0d70d 1976
dfb09bd1
MN
1977 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1978 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
2da0d70d 1979 }
dfb09bd1 1980#endif
a861d4d7
MN
1981}
1982
7ac40327 1983static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
2f60f629
MN
1984{
1985 int i;
1986 assert(src1==src2);
dd68318c 1987 for (i=0; i<width; i++) {
e09d7eef
MN
1988 int r= src1[6*i + 0] + src1[6*i + 3];
1989 int g= src1[6*i + 1] + src1[6*i + 4];
1990 int b= src1[6*i + 2] + src1[6*i + 5];
2f60f629
MN
1991
1992 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1993 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1994 }
1995}
1996
1e621b18 1997
8a322796 1998// bilinear / bicubic scaling
7ac40327
RP
1999static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW, int xInc,
2000 const int16_t *filter, const int16_t *filterPos, long filterSize)
2ff198c1 2001{
94daf2e9 2002#if COMPILE_TEMPLATE_MMX
2da0d70d 2003 assert(filterSize % 4 == 0 && filterSize>0);
dd68318c 2004 if (filterSize==4) { // Always true for upscaling, sometimes for down, too.
d0ce212a 2005 x86_reg counter= -2*dstW;
2da0d70d
DB
2006 filter-= counter*2;
2007 filterPos-= counter/2;
2008 dst-= counter/2;
7ad6469e 2009 __asm__ volatile(
83c89c78 2010#if defined(PIC)
c255994b 2011 "push %%"REG_b" \n\t"
2da0d70d 2012#endif
c255994b
RP
2013 "pxor %%mm7, %%mm7 \n\t"
2014 "push %%"REG_BP" \n\t" // we use 7 regs here ...
2015 "mov %%"REG_a", %%"REG_BP" \n\t"
2016 ASMALIGN(4)
2017 "1: \n\t"
2018 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
2019 "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t"
2020 "movq (%1, %%"REG_BP", 4), %%mm1 \n\t"
2021 "movq 8(%1, %%"REG_BP", 4), %%mm3 \n\t"
2022 "movd (%3, %%"REG_a"), %%mm0 \n\t"
2023 "movd (%3, %%"REG_b"), %%mm2 \n\t"
2024 "punpcklbw %%mm7, %%mm0 \n\t"
2025 "punpcklbw %%mm7, %%mm2 \n\t"
2026 "pmaddwd %%mm1, %%mm0 \n\t"
2027 "pmaddwd %%mm2, %%mm3 \n\t"
2028 "movq %%mm0, %%mm4 \n\t"
2029 "punpckldq %%mm3, %%mm0 \n\t"
2030 "punpckhdq %%mm3, %%mm4 \n\t"
2031 "paddd %%mm4, %%mm0 \n\t"
2032 "psrad $7, %%mm0 \n\t"
2033 "packssdw %%mm0, %%mm0 \n\t"
2034 "movd %%mm0, (%4, %%"REG_BP") \n\t"
2035 "add $4, %%"REG_BP" \n\t"
2036 " jnc 1b \n\t"
2037
2038 "pop %%"REG_BP" \n\t"
83c89c78 2039#if defined(PIC)
c255994b 2040 "pop %%"REG_b" \n\t"
83c89c78 2041#endif
c255994b
RP
2042 : "+a" (counter)
2043 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
83c89c78 2044#if !defined(PIC)
c255994b 2045 : "%"REG_b
2da0d70d
DB
2046#endif
2047 );
dd68318c 2048 } else if (filterSize==8) {
d0ce212a 2049 x86_reg counter= -2*dstW;
2da0d70d
DB
2050 filter-= counter*4;
2051 filterPos-= counter/2;
2052 dst-= counter/2;
7ad6469e 2053 __asm__ volatile(
83c89c78 2054#if defined(PIC)
c255994b 2055 "push %%"REG_b" \n\t"
2da0d70d 2056#endif
c255994b
RP
2057 "pxor %%mm7, %%mm7 \n\t"
2058 "push %%"REG_BP" \n\t" // we use 7 regs here ...
2059 "mov %%"REG_a", %%"REG_BP" \n\t"
2060 ASMALIGN(4)
2061 "1: \n\t"
2062 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
2063 "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t"
2064 "movq (%1, %%"REG_BP", 8), %%mm1 \n\t"
2065 "movq 16(%1, %%"REG_BP", 8), %%mm3 \n\t"
2066 "movd (%3, %%"REG_a"), %%mm0 \n\t"
2067 "movd (%3, %%"REG_b"), %%mm2 \n\t"
2068 "punpcklbw %%mm7, %%mm0 \n\t"
2069 "punpcklbw %%mm7, %%mm2 \n\t"
2070 "pmaddwd %%mm1, %%mm0 \n\t"
2071 "pmaddwd %%mm2, %%mm3 \n\t"
2072
2073 "movq 8(%1, %%"REG_BP", 8), %%mm1 \n\t"
2074 "movq 24(%1, %%"REG_BP", 8), %%mm5 \n\t"
2075 "movd 4(%3, %%"REG_a"), %%mm4 \n\t"
2076 "movd 4(%3, %%"REG_b"), %%mm2 \n\t"
2077 "punpcklbw %%mm7, %%mm4 \n\t"
2078 "punpcklbw %%mm7, %%mm2 \n\t"
2079 "pmaddwd %%mm1, %%mm4 \n\t"
2080 "pmaddwd %%mm2, %%mm5 \n\t"
2081 "paddd %%mm4, %%mm0 \n\t"
2082 "paddd %%mm5, %%mm3 \n\t"
2083 "movq %%mm0, %%mm4 \n\t"
2084 "punpckldq %%mm3, %%mm0 \n\t"
2085 "punpckhdq %%mm3, %%mm4 \n\t"
2086 "paddd %%mm4, %%mm0 \n\t"
2087 "psrad $7, %%mm0 \n\t"
2088 "packssdw %%mm0, %%mm0 \n\t"
2089 "movd %%mm0, (%4, %%"REG_BP") \n\t"
2090 "add $4, %%"REG_BP" \n\t"
2091 " jnc 1b \n\t"
2092
2093 "pop %%"REG_BP" \n\t"
83c89c78 2094#if defined(PIC)
c255994b 2095 "pop %%"REG_b" \n\t"
83c89c78 2096#endif
c255994b
RP
2097 : "+a" (counter)
2098 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
83c89c78 2099#if !defined(PIC)
c255994b 2100 : "%"REG_b
2da0d70d
DB
2101#endif
2102 );
dd68318c 2103 } else {
2da0d70d 2104 uint8_t *offset = src+filterSize;
d0ce212a 2105 x86_reg counter= -2*dstW;
2da0d70d
DB
2106 //filter-= counter*filterSize/2;
2107 filterPos-= counter/2;
2108 dst-= counter/2;
7ad6469e 2109 __asm__ volatile(
c255994b
RP
2110 "pxor %%mm7, %%mm7 \n\t"
2111 ASMALIGN(4)
2112 "1: \n\t"
2113 "mov %2, %%"REG_c" \n\t"
2114 "movzwl (%%"REG_c", %0), %%eax \n\t"
2115 "movzwl 2(%%"REG_c", %0), %%edx \n\t"
2116 "mov %5, %%"REG_c" \n\t"
2117 "pxor %%mm4, %%mm4 \n\t"
2118 "pxor %%mm5, %%mm5 \n\t"
2119 "2: \n\t"
2120 "movq (%1), %%mm1 \n\t"
2121 "movq (%1, %6), %%mm3 \n\t"
2122 "movd (%%"REG_c", %%"REG_a"), %%mm0 \n\t"
2123 "movd (%%"REG_c", %%"REG_d"), %%mm2 \n\t"
2124 "punpcklbw %%mm7, %%mm0 \n\t"
2125 "punpcklbw %%mm7, %%mm2 \n\t"
2126 "pmaddwd %%mm1, %%mm0 \n\t"
2127 "pmaddwd %%mm2, %%mm3 \n\t"
2128 "paddd %%mm3, %%mm5 \n\t"
2129 "paddd %%mm0, %%mm4 \n\t"
2130 "add $8, %1 \n\t"
2131 "add $4, %%"REG_c" \n\t"
2132 "cmp %4, %%"REG_c" \n\t"
2133 " jb 2b \n\t"
2134 "add %6, %1 \n\t"
2135 "movq %%mm4, %%mm0 \n\t"
2136 "punpckldq %%mm5, %%mm4 \n\t"
2137 "punpckhdq %%mm5, %%mm0 \n\t"
2138 "paddd %%mm0, %%mm4 \n\t"
2139 "psrad $7, %%mm4 \n\t"
2140 "packssdw %%mm4, %%mm4 \n\t"
2141 "mov %3, %%"REG_a" \n\t"
2142 "movd %%mm4, (%%"REG_a", %0) \n\t"
2143 "add $4, %0 \n\t"
2144 " jnc 1b \n\t"
2145
2146 : "+r" (counter), "+r" (filter)
2147 : "m" (filterPos), "m" (dst), "m"(offset),
2148 "m" (src), "r" ((x86_reg)filterSize*2)
2149 : "%"REG_a, "%"REG_c, "%"REG_d
2da0d70d
DB
2150 );
2151 }
077ea8a7 2152#else
94daf2e9 2153#if COMPILE_TEMPLATE_ALTIVEC
2da0d70d 2154 hScale_altivec_real(dst, dstW, src, srcW, xInc, filter, filterPos, filterSize);
8c266f0c 2155#else
2da0d70d 2156 int i;
dd68318c 2157 for (i=0; i<dstW; i++) {
2da0d70d
DB
2158 int j;
2159 int srcPos= filterPos[i];
2160 int val=0;
2161 //printf("filterPos: %d\n", filterPos[i]);
dd68318c 2162 for (j=0; j<filterSize; j++) {
2da0d70d
DB
2163 //printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]);
2164 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
2165 }
2166 //filter += hFilterSize;
881c4294 2167 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
2da0d70d
DB
2168 //dst[i] = val>>7;
2169 }
94daf2e9
RP
2170#endif /* COMPILE_ALTIVEC */
2171#endif /* COMPILE_MMX */
077ea8a7 2172}
392b6567 2173
18c61752
RP
2174#define FAST_BILINEAR_X86 \
2175 "subl %%edi, %%esi \n\t" /* src[xx+1] - src[xx] */ \
2176 "imull %%ecx, %%esi \n\t" /* (src[xx+1] - src[xx])*xalpha */ \
2177 "shll $16, %%edi \n\t" \
2178 "addl %%edi, %%esi \n\t" /* src[xx+1]*xalpha + src[xx]*(1-xalpha) */ \
2179 "mov %1, %%"REG_D"\n\t" \
2180 "shrl $9, %%esi \n\t" \
2181
392b6567 2182static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
7ac40327 2183 int dstWidth, const uint8_t *src, int srcW,
392b6567
RP
2184 int xInc)
2185{
2186 int i;
2187 unsigned int xpos=0;
dd68318c 2188 for (i=0;i<dstWidth;i++) {
392b6567
RP
2189 register unsigned int xx=xpos>>16;
2190 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2191 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2192 xpos+=xInc;
2193 }
2194}
2195
2ff198c1 2196 // *** horizontal scale Y line to temp buffer
7ac40327
RP
2197static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src, int srcW, int xInc,
2198 int flags, const int16_t *hLumFilter,
2199 const int16_t *hLumFilterPos, int hLumFilterSize,
95b5770b
RP
2200 int srcFormat, uint8_t *formatConvBuffer,
2201 uint32_t *pal, int isAlpha)
077ea8a7 2202{
fdf70cc5
RP
2203 int32_t av_unused *mmx2FilterPos = c->lumMmx2FilterPos;
2204 int16_t av_unused *mmx2Filter = c->lumMmx2Filter;
2205 int av_unused canMMX2BeUsed = c->canMMX2BeUsed;
bcdedf67 2206 void av_unused *mmx2FilterCode= c->lumMmx2FilterCode;
39e5f87b 2207 void (*internal_func)(uint8_t *, const uint8_t *, long, uint32_t *) = isAlpha ? c->hascale_internal : c->hyscale_internal;
95b5770b 2208
40fa5140
RP
2209 if (isAlpha) {
2210 if (srcFormat == PIX_FMT_RGB32 || srcFormat == PIX_FMT_BGR32 )
2211 src += 3;
2212 } else {
2213 if (srcFormat == PIX_FMT_RGB32_1 || srcFormat == PIX_FMT_BGR32_1)
2214 src += ALT32_CORR;
9990e426 2215 }
40fa5140 2216
e8417235
KS
2217 if (srcFormat == PIX_FMT_RGB48LE)
2218 src++;
2219
39e5f87b
CS
2220 if (internal_func) {
2221 internal_func(formatConvBuffer, src, srcW, pal);
3d05e078
MN
2222 src= formatConvBuffer;
2223 }
1e621b18 2224
94daf2e9 2225#if COMPILE_TEMPLATE_MMX
8a322796 2226 // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one).
2da0d70d 2227 if (!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
e3d2500f 2228#else
2da0d70d 2229 if (!(flags&SWS_FAST_BILINEAR))
e3d2500f 2230#endif
077ea8a7 2231 {
40fa5140 2232 c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
dd68318c 2233 } else { // fast bilinear upscale / crap downscale
57f9a560 2234#if ARCH_X86 && CONFIG_GPL
94daf2e9 2235#if COMPILE_TEMPLATE_MMX2
2da0d70d 2236 int i;
83c89c78 2237#if defined(PIC)
934626a9 2238 DECLARE_ALIGNED(8, uint64_t, ebxsave);
83c89c78 2239#endif
dd68318c 2240 if (canMMX2BeUsed) {
7ad6469e 2241 __asm__ volatile(
83c89c78 2242#if defined(PIC)
c255994b 2243 "mov %%"REG_b", %5 \n\t"
2da0d70d 2244#endif
c255994b
RP
2245 "pxor %%mm7, %%mm7 \n\t"
2246 "mov %0, %%"REG_c" \n\t"
2247 "mov %1, %%"REG_D" \n\t"
2248 "mov %2, %%"REG_d" \n\t"
2249 "mov %3, %%"REG_b" \n\t"
2250 "xor %%"REG_a", %%"REG_a" \n\t" // i
2251 PREFETCH" (%%"REG_c") \n\t"
2252 PREFETCH" 32(%%"REG_c") \n\t"
2253 PREFETCH" 64(%%"REG_c") \n\t"
99cefd0b 2254
b63f641e 2255#if ARCH_X86_64
6d606c4f 2256
bcdedf67 2257#define CALL_MMX2_FILTER_CODE \
c255994b
RP
2258 "movl (%%"REG_b"), %%esi \n\t"\
2259 "call *%4 \n\t"\
2260 "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\
2261 "add %%"REG_S", %%"REG_c" \n\t"\
2262 "add %%"REG_a", %%"REG_D" \n\t"\
2263 "xor %%"REG_a", %%"REG_a" \n\t"\
6d606c4f
AJ
2264
2265#else
2266
bcdedf67 2267#define CALL_MMX2_FILTER_CODE \
c255994b
RP
2268 "movl (%%"REG_b"), %%esi \n\t"\
2269 "call *%4 \n\t"\
2270 "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
2271 "add %%"REG_a", %%"REG_D" \n\t"\
2272 "xor %%"REG_a", %%"REG_a" \n\t"\
99cefd0b 2273
bc279024 2274#endif /* ARCH_X86_64 */
6d606c4f 2275
c255994b
RP
2276 CALL_MMX2_FILTER_CODE
2277 CALL_MMX2_FILTER_CODE
2278 CALL_MMX2_FILTER_CODE
2279 CALL_MMX2_FILTER_CODE
2280 CALL_MMX2_FILTER_CODE
2281 CALL_MMX2_FILTER_CODE
2282 CALL_MMX2_FILTER_CODE
2283 CALL_MMX2_FILTER_CODE
2ff198c1 2284
83c89c78 2285#if defined(PIC)
c255994b 2286 "mov %5, %%"REG_b" \n\t"
83c89c78 2287#endif
c255994b
RP
2288 :: "m" (src), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
2289 "m" (mmx2FilterCode)
83c89c78 2290#if defined(PIC)
c255994b 2291 ,"m" (ebxsave)
83c89c78 2292#endif
c255994b 2293 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
83c89c78 2294#if !defined(PIC)
c255994b 2295 ,"%"REG_b
2da0d70d
DB
2296#endif
2297 );
2298 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128;
dd68318c 2299 } else {
94daf2e9 2300#endif /* COMPILE_TEMPLATE_MMX2 */
d0ce212a 2301 x86_reg xInc_shr16 = xInc >> 16;
2da0d70d
DB
2302 uint16_t xInc_mask = xInc & 0xffff;
2303 //NO MMX just normal asm ...
7ad6469e 2304 __asm__ volatile(
c255994b
RP
2305 "xor %%"REG_a", %%"REG_a" \n\t" // i
2306 "xor %%"REG_d", %%"REG_d" \n\t" // xx
2307 "xorl %%ecx, %%ecx \n\t" // xalpha
2308 ASMALIGN(4)
2309 "1: \n\t"
2310 "movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx]
2311 "movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1]
2312 FAST_BILINEAR_X86
2313 "movw %%si, (%%"REG_D", %%"REG_a", 2) \n\t"
2314 "addw %4, %%cx \n\t" //xalpha += xInc&0xFFFF
2315 "adc %3, %%"REG_d" \n\t" //xx+= xInc>>16 + carry
2316
2317 "movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx]
2318 "movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1]
2319 FAST_BILINEAR_X86
2320 "movw %%si, 2(%%"REG_D", %%"REG_a", 2) \n\t"
2321 "addw %4, %%cx \n\t" //xalpha += xInc&0xFFFF
2322 "adc %3, %%"REG_d" \n\t" //xx+= xInc>>16 + carry
2323
2324
2325 "add $2, %%"REG_a" \n\t"
2326 "cmp %2, %%"REG_a" \n\t"
2327 " jb 1b \n\t"
2328
2329
2330 :: "r" (src), "m" (dst), "m" (dstWidth), "m" (xInc_shr16), "m" (xInc_mask)
2331 : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
2da0d70d 2332 );
94daf2e9 2333#if COMPILE_TEMPLATE_MMX2
2da0d70d 2334 } //if MMX2 can't be used
2ff198c1
MN
2335#endif
2336#else
40fa5140 2337 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
b63f641e 2338#endif /* ARCH_X86 */
077ea8a7 2339 }
6bc0c792 2340
dd68318c 2341 if(!isAlpha && c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))) {
6bc0c792
MN
2342 int i;
2343 //FIXME all pal and rgb srcFormats could do this convertion as well
2344 //FIXME all scalers more complex than bilinear could do half of this transform
dd68318c 2345 if(c->srcRange) {
6bc0c792
MN
2346 for (i=0; i<dstWidth; i++)
2347 dst[i]= (dst[i]*14071 + 33561947)>>14;
dd68318c 2348 } else {
6bc0c792 2349 for (i=0; i<dstWidth; i++)
aa13b0fc 2350 dst[i]= (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
6bc0c792
MN
2351 }
2352 }
2ff198c1
MN
2353}
2354
392b6567 2355static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
7ac40327
RP
2356 int dstWidth, const uint8_t *src1,
2357 const uint8_t *src2, int srcW, int xInc)
392b6567
RP
2358{
2359 int i;
2360 unsigned int xpos=0;
dd68318c 2361 for (i=0;i<dstWidth;i++) {
392b6567
RP
2362 register unsigned int xx=xpos>>16;
2363 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2364 dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2365 dst[i+VOFW]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2366 /* slower
2367 dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha;
2368 dst[i+VOFW]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha;
2369 */
2370 xpos+=xInc;
2371 }
2372}
2373
7ac40327
RP
2374inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src1, const uint8_t *src2,
2375 int srcW, int xInc, int flags, const int16_t *hChrFilter,
2376 const int16_t *hChrFilterPos, int hChrFilterSize,
95b5770b
RP
2377 int srcFormat, uint8_t *formatConvBuffer,
2378 uint32_t *pal)
2ff198c1 2379{
fdf70cc5
RP
2380 int32_t av_unused *mmx2FilterPos = c->chrMmx2FilterPos;
2381 int16_t av_unused *mmx2Filter = c->chrMmx2Filter;
2382 int av_unused canMMX2BeUsed = c->canMMX2BeUsed;
bcdedf67 2383 void av_unused *mmx2FilterCode= c->chrMmx2FilterCode;
95b5770b 2384
40fa5140 2385 if (isGray(srcFormat) || srcFormat==PIX_FMT_MONOBLACK || srcFormat==PIX_FMT_MONOWHITE)
2da0d70d 2386 return;
40fa5140 2387
f2671197 2388 if (srcFormat==PIX_FMT_RGB32_1 || srcFormat==PIX_FMT_BGR32_1) {
40fa5140
RP
2389 src1 += ALT32_CORR;
2390 src2 += ALT32_CORR;
6ff0ad6b 2391 }
40fa5140 2392
e8417235
KS
2393 if (srcFormat==PIX_FMT_RGB48LE) {
2394 src1++;
2395 src2++;
2396 }
2397
40fa5140
RP
2398 if (c->hcscale_internal) {
2399 c->hcscale_internal(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
2da0d70d 2400 src1= formatConvBuffer;
8b2fce0d 2401 src2= formatConvBuffer+VOFW;
e28630fc 2402 }
1e621b18 2403
94daf2e9 2404#if COMPILE_TEMPLATE_MMX
8a322796 2405 // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one).
2da0d70d 2406 if (!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
e3d2500f 2407#else
2da0d70d 2408 if (!(flags&SWS_FAST_BILINEAR))
e3d2500f 2409#endif
077ea8a7 2410 {
40fa5140
RP
2411 c->hScale(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
2412 c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
dd68318c 2413 } else { // fast bilinear upscale / crap downscale
57f9a560 2414#if ARCH_X86 && CONFIG_GPL
94daf2e9 2415#if COMPILE_TEMPLATE_MMX2
2da0d70d 2416 int i;
83c89c78 2417#if defined(PIC)
934626a9 2418 DECLARE_ALIGNED(8, uint64_t, ebxsave);
83c89c78 2419#endif
dd68318c 2420 if (canMMX2BeUsed) {
7ad6469e 2421 __asm__ volatile(
83c89c78 2422#if defined(PIC)
c255994b 2423 "mov %%"REG_b", %6 \n\t"
2da0d70d 2424#endif
c255994b
RP
2425 "pxor %%mm7, %%mm7 \n\t"
2426 "mov %0, %%"REG_c" \n\t"
2427 "mov %1, %%"REG_D" \n\t"
2428 "mov %2, %%"REG_d" \n\t"
2429 "mov %3, %%"REG_b" \n\t"
2430 "xor %%"REG_a", %%"REG_a" \n\t" // i
2431 PREFETCH" (%%"REG_c") \n\t"
2432 PREFETCH" 32(%%"REG_c") \n\t"
2433 PREFETCH" 64(%%"REG_c") \n\t"
2434
2435 CALL_MMX2_FILTER_CODE
2436 CALL_MMX2_FILTER_CODE
2437 CALL_MMX2_FILTER_CODE
2438 CALL_MMX2_FILTER_CODE
2439 "xor %%"REG_a", %%"REG_a" \n\t" // i
2440 "mov %5, %%"REG_c" \n\t" // src
2441 "mov %1, %%"REG_D" \n\t" // buf1
2442 "add $"AV_STRINGIFY(VOF)", %%"REG_D" \n\t"
2443 PREFETCH" (%%"REG_c") \n\t"
2444 PREFETCH" 32(%%"REG_c") \n\t"
2445 PREFETCH" 64(%%"REG_c") \n\t"
2446
2447 CALL_MMX2_FILTER_CODE
2448 CALL_MMX2_FILTER_CODE
2449 CALL_MMX2_FILTER_CODE
2450 CALL_MMX2_FILTER_CODE
b7dc6f66 2451
83c89c78 2452#if defined(PIC)
c255994b 2453 "mov %6, %%"REG_b" \n\t"
83c89c78 2454#endif
c255994b
RP
2455 :: "m" (src1), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
2456 "m" (mmx2FilterCode), "m" (src2)
83c89c78 2457#if defined(PIC)
c255994b 2458 ,"m" (ebxsave)
83c89c78 2459#endif
c255994b 2460 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
83c89c78 2461#if !defined(PIC)
c255994b 2462 ,"%"REG_b
2da0d70d
DB
2463#endif
2464 );
dd68318c 2465 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
2da0d70d
DB
2466 //printf("%d %d %d\n", dstWidth, i, srcW);
2467 dst[i] = src1[srcW-1]*128;
8b2fce0d 2468 dst[i+VOFW] = src2[srcW-1]*128;
2da0d70d 2469 }
dd68318c 2470 } else {
94daf2e9 2471#endif /* COMPILE_TEMPLATE_MMX2 */
d0ce212a 2472 x86_reg xInc_shr16 = (x86_reg) (xInc >> 16);
2da0d70d 2473 uint16_t xInc_mask = xInc & 0xffff;
7ad6469e 2474 __asm__ volatile(
c255994b
RP
2475 "xor %%"REG_a", %%"REG_a" \n\t" // i
2476 "xor %%"REG_d", %%"REG_d" \n\t" // xx
2477 "xorl %%ecx, %%ecx \n\t" // xalpha
2478 ASMALIGN(4)
2479 "1: \n\t"
2480 "mov %0, %%"REG_S" \n\t"
2481 "movzbl (%%"REG_S", %%"REG_d"), %%edi \n\t" //src[xx]
2482 "movzbl 1(%%"REG_S", %%"REG_d"), %%esi \n\t" //src[xx+1]
2483 FAST_BILINEAR_X86
2484 "movw %%si, (%%"REG_D", %%"REG_a", 2) \n\t"
2485
2486 "movzbl (%5, %%"REG_d"), %%edi \n\t" //src[xx]
2487 "movzbl 1(%5, %%"REG_d"), %%esi \n\t" //src[xx+1]
2488 FAST_BILINEAR_X86
2489 "movw %%si, "AV_STRINGIFY(VOF)"(%%"REG_D", %%"REG_a", 2) \n\t"
2490
2491 "addw %4, %%cx \n\t" //xalpha += xInc&0xFFFF
2492 "adc %3, %%"REG_d" \n\t" //xx+= xInc>>16 + carry
2493 "add $1, %%"REG_a" \n\t"
2494 "cmp %2, %%"REG_a" \n\t"
2495 " jb 1b \n\t"
2ff198c1 2496
8a322796
DB
2497/* GCC 3.3 makes MPlayer crash on IA-32 machines when using "g" operand here,
2498 which is needed to support GCC 4.0. */
b63f641e 2499#if ARCH_X86_64 && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
c255994b 2500 :: "m" (src1), "m" (dst), "g" (dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
dc77ef7f 2501#else
c255994b 2502 :: "m" (src1), "m" (dst), "m" (dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
dc77ef7f 2503#endif
c255994b
RP
2504 "r" (src2)
2505 : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
2da0d70d 2506 );
94daf2e9 2507#if COMPILE_TEMPLATE_MMX2
2da0d70d 2508 } //if MMX2 can't be used
2ff198c1
MN
2509#endif
2510#else
40fa5140 2511 c->hcscale_fast(c, dst, dstWidth, src1, src2, srcW, xInc);
b63f641e 2512#endif /* ARCH_X86 */
2da0d70d 2513 }
dd68318c 2514 if(c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))) {
6bc0c792
MN
2515 int i;
2516 //FIXME all pal and rgb srcFormats could do this convertion as well
2517 //FIXME all scalers more complex than bilinear could do half of this transform
dd68318c
RP
2518 if(c->srcRange) {
2519 for (i=0; i<dstWidth; i++) {
6bc0c792
MN
2520 dst[i ]= (dst[i ]*1799 + 4081085)>>11; //1469
2521 dst[i+VOFW]= (dst[i+VOFW]*1799 + 4081085)>>11; //1469
2522 }
dd68318c
RP
2523 } else {
2524 for (i=0; i<dstWidth; i++) {
aa13b0fc
MN
2525 dst[i ]= (FFMIN(dst[i ],30775)*4663 - 9289992)>>12; //-264
2526 dst[i+VOFW]= (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12; //-264
6bc0c792
MN
2527 }
2528 }
2529 }
077ea8a7
MN
2530}
2531
3e499f53 2532static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
dd68318c
RP
2533 int srcSliceH, uint8_t* dst[], int dstStride[])
2534{
2da0d70d
DB
2535 /* load a few things into local vars to make the code more readable? and faster */
2536 const int srcW= c->srcW;
2537 const int dstW= c->dstW;
2538 const int dstH= c->dstH;
2539 const int chrDstW= c->chrDstW;
2540 const int chrSrcW= c->chrSrcW;
2541 const int lumXInc= c->lumXInc;
2542 const int chrXInc= c->chrXInc;
2543 const int dstFormat= c->dstFormat;
2544 const int srcFormat= c->srcFormat;
2545 const int flags= c->flags;
2da0d70d
DB
2546 int16_t *vLumFilterPos= c->vLumFilterPos;
2547 int16_t *vChrFilterPos= c->vChrFilterPos;
2548 int16_t *hLumFilterPos= c->hLumFilterPos;
2549 int16_t *hChrFilterPos= c->hChrFilterPos;
2550 int16_t *vLumFilter= c->vLumFilter;
2551 int16_t *vChrFilter= c->vChrFilter;
2552 int16_t *hLumFilter= c->hLumFilter;
2553 int16_t *hChrFilter= c->hChrFilter;
2554 int32_t *lumMmxFilter= c->lumMmxFilter;
2555 int32_t *chrMmxFilter= c->chrMmxFilter;
6858492e 2556 int32_t *alpMmxFilter= c->alpMmxFilter;
2da0d70d
DB
2557 const int vLumFilterSize= c->vLumFilterSize;
2558 const int vChrFilterSize= c->vChrFilterSize;
2559 const int hLumFilterSize= c->hLumFilterSize;
2560 const int hChrFilterSize= c->hChrFilterSize;
2561 int16_t **lumPixBuf= c->lumPixBuf;
2562 int16_t **chrPixBuf= c->chrPixBuf;
6858492e 2563 int16_t **alpPixBuf= c->alpPixBuf;
2da0d70d
DB
2564 const int vLumBufSize= c->vLumBufSize;
2565 const int vChrBufSize= c->vChrBufSize;
2da0d70d
DB
2566 uint8_t *formatConvBuffer= c->formatConvBuffer;
2567 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2568 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2569 int lastDstY;
e150ef8d 2570 uint32_t *pal=c->pal_yuv;
2da0d70d 2571
8a322796 2572 /* vars which will change and which we need to store back in the context */
2da0d70d
DB
2573 int dstY= c->dstY;
2574 int lumBufIndex= c->lumBufIndex;
2575 int chrBufIndex= c->chrBufIndex;
2576 int lastInLumBuf= c->lastInLumBuf;
2577 int lastInChrBuf= c->lastInChrBuf;
2578
dd68318c 2579 if (isPacked(c->srcFormat)) {
2da0d70d
DB
2580 src[0]=
2581 src[1]=
6858492e
CS
2582 src[2]=
2583 src[3]= src[0];
2da0d70d
DB
2584 srcStride[0]=
2585 srcStride[1]=
6858492e
CS
2586 srcStride[2]=
2587 srcStride[3]= srcStride[0];
2da0d70d
DB
2588 }
2589 srcStride[1]<<= c->vChrDrop;
2590 srcStride[2]<<= c->vChrDrop;
2591
2592 //printf("swscale %X %X %X -> %X %X %X\n", (int)src[0], (int)src[1], (int)src[2],
2593 // (int)dst[0], (int)dst[1], (int)dst[2]);
c7a810cc 2594
2da0d70d
DB
2595 //printf("sws Strides:%d %d %d -> %d %d %d\n", srcStride[0],srcStride[1],srcStride[2],
2596 //dstStride[0],dstStride[1],dstStride[2]);
2597
dd68318c 2598 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
6683a37f 2599 static int warnedAlready=0; //FIXME move this into the context perhaps
dd68318c 2600 if (flags & SWS_PRINT_INFO && !warnedAlready) {
4b0c30b7 2601 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
8a322796 2602 " ->cannot do aligned memory accesses anymore\n");
6683a37f 2603 warnedAlready=1;
2da0d70d
DB
2604 }
2605 }
2606
8a322796
DB
2607 /* Note the user might start scaling the picture in the middle so this
2608 will not get executed. This is not really intended but works
2609 currently, so people might do it. */
dd68318c 2610 if (srcSliceY ==0) {
2da0d70d
DB
2611 lumBufIndex=0;
2612 chrBufIndex=0;
2613 dstY=0;
2614 lastInLumBuf= -1;
2615 lastInChrBuf= -1;
2616 }
2617
2618 lastDstY= dstY;
2619
dd68318c 2620 for (;dstY < dstH; dstY++) {
2da0d70d
DB
2621 unsigned char *dest =dst[0]+dstStride[0]*dstY;
2622 const int chrDstY= dstY>>c->chrDstVSubSample;
2623 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
2624 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
6858492e 2625 unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
2da0d70d
DB
2626
2627 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2628 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
fb91df39
RP
2629 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2630 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2631 int enough_lines;
2da0d70d
DB
2632
2633 //printf("dstY:%d dstH:%d firstLumSrcY:%d lastInLumBuf:%d vLumBufSize: %d vChrBufSize: %d slice: %d %d vLumFilterSize: %d firstChrSrcY: %d vChrFilterSize: %d c->chrSrcVSubSample: %d\n",
2634 // dstY, dstH, firstLumSrcY, lastInLumBuf, vLumBufSize, vChrBufSize, srcSliceY, srcSliceH, vLumFilterSize, firstChrSrcY, vChrFilterSize, c->chrSrcVSubSample);
2635 //handle holes (FAST_BILINEAR & weird filters)
2636 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2637 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2638 //printf("%d %d %d\n", firstChrSrcY, lastInChrBuf, vChrBufSize);
fcc402b1
LB
2639 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2640 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2da0d70d
DB
2641
2642 // Do we have enough lines in this slice to output the dstY line
fb91df39
RP
2643 enough_lines = lastLumSrcY < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2644 if (!enough_lines) {
2645 lastLumSrcY = srcSliceY + srcSliceH - 1;
2646 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2647 }
2648
cbdc08d5
RP
2649 /* printf("%d %d Last:%d %d LastInBuf:%d %d Index:%d %d Y:%d FSize: %d %d BSize: %d %d\n",
2650 firstChrSrcY,firstLumSrcY,lastChrSrcY,lastLumSrcY,
2651 lastInChrBuf,lastInLumBuf,chrBufIndex,lumBufIndex,dstY,vChrFilterSize,vLumFilterSize,
2652 vChrBufSize, vLumBufSize);*/
fb91df39 2653
cbdc08d5 2654 //Do horizontal scaling
dd68318c 2655 while(lastInLumBuf < lastLumSrcY) {
cbdc08d5
RP
2656 uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2657 uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2658 lumBufIndex++;
2659 //printf("%d %d %d %d\n", lumBufIndex, vLumBufSize, lastInLumBuf, lastLumSrcY);
2660 assert(lumBufIndex < 2*vLumBufSize);
2661 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2662 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2663 //printf("%d %d\n", lumBufIndex, vLumBufSize);
2664 RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2665 flags, hLumFilter, hLumFilterPos, hLumFilterSize,
2666 c->srcFormat, formatConvBuffer,
2667 pal, 0);
2668 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2669 RENAME(hyscale)(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW, lumXInc,
95b5770b
RP
2670 flags, hLumFilter, hLumFilterPos, hLumFilterSize,
2671 c->srcFormat, formatConvBuffer,
cbdc08d5
RP
2672 pal, 1);
2673 lastInLumBuf++;
2674 }
dd68318c 2675 while(lastInChrBuf < lastChrSrcY) {
cbdc08d5
RP
2676 uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
2677 uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
2678 chrBufIndex++;
2679 assert(chrBufIndex < 2*vChrBufSize);
2680 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2681 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2682 //FIXME replace parameters through context struct (some at least)
2683
2684 if (!(isGray(srcFormat) || isGray(dstFormat)))
2685 RENAME(hcscale)(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
2686 flags, hChrFilter, hChrFilterPos, hChrFilterSize,
2687 c->srcFormat, formatConvBuffer,
2688 pal);
2689 lastInChrBuf++;
2690 }
2691 //wrap buf index around to stay inside the ring buffer
2692 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2693 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2694 if (!enough_lines)
2da0d70d 2695 break; //we can't output a dstY line so let's try with the next slice
d3f41512 2696
94daf2e9 2697#if COMPILE_TEMPLATE_MMX
88e2a9ae 2698 c->blueDither= ff_dither8[dstY&1];
92c7b471 2699 if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555)
88e2a9ae 2700 c->greenDither= ff_dither8[dstY&1];
92c7b471 2701 else
88e2a9ae
CEH
2702 c->greenDither= ff_dither4[dstY&1];
2703 c->redDither= ff_dither8[(dstY+1)&1];
2da0d70d 2704#endif
dd68318c 2705 if (dstY < dstH-2) {
7ac40327
RP
2706 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2707 const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2708 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
94daf2e9 2709#if COMPILE_TEMPLATE_MMX
2da0d70d 2710 int i;
dd68318c 2711 if (flags & SWS_ACCURATE_RND) {
9b734d44 2712 int s= APCK_SIZE / 8;
dd68318c 2713 for (i=0; i<vLumFilterSize; i+=2) {
9b734d44
RP
2714 *(void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
2715 *(void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
2716 lumMmxFilter[s*i+APCK_COEF/4 ]=
2717 lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
2718 + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
dd68318c 2719 if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
9b734d44
RP
2720 *(void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
2721 *(void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
2722 alpMmxFilter[s*i+APCK_COEF/4 ]=
2723 alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ];
2724 }
6858492e 2725 }
dd68318c 2726 for (i=0; i<vChrFilterSize; i+=2) {
9b734d44
RP
2727 *(void**)&chrMmxFilter[s*i ]= chrSrcPtr[i ];
2728 *(void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrSrcPtr[i+(vChrFilterSize>1)];
2729 chrMmxFilter[s*i+APCK_COEF/4 ]=
2730 chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
2731 + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
2732 }
dd68318c
RP
2733 } else {
2734 for (i=0; i<vLumFilterSize; i++) {
9b734d44
RP
2735 lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
2736 lumMmxFilter[4*i+1]= (uint64_t)lumSrcPtr[i] >> 32;
2737 lumMmxFilter[4*i+2]=
2738 lumMmxFilter[4*i+3]=
2739 ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
dd68318c 2740 if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
9b734d44
RP
2741 alpMmxFilter[4*i+0]= (int32_t)alpSrcPtr[i];
2742 alpMmxFilter[4*i+1]= (uint64_t)alpSrcPtr[i] >> 32;
2743 alpMmxFilter[4*i+2]=
2744 alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
2745 }
2746 }
dd68318c 2747 for (i=0; i<vChrFilterSize; i++) {
9b734d44
RP
2748 chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
2749 chrMmxFilter[4*i+1]= (uint64_t)chrSrcPtr[i] >> 32;
2750 chrMmxFilter[4*i+2]=
2751 chrMmxFilter[4*i+3]=
2752 ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
6858492e 2753 }
2da0d70d 2754 }
6542b44e 2755#endif
dd68318c 2756 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
2da0d70d
DB
2757 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2758 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
40fa5140 2759 c->yuv2nv12X(c,
9b734d44
RP
2760 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
2761 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2762 dest, uDest, dstW, chrDstW, dstFormat);
dd68318c 2763 } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2da0d70d
DB
2764 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2765 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
dd68318c 2766 if (is16BPS(dstFormat)) {
52154148 2767 yuv2yuvX16inC(
9b734d44
RP
2768 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
2769 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2770 alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
2771 dstFormat);
dd68318c 2772 } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
2da0d70d
DB
2773 int16_t *lumBuf = lumPixBuf[0];
2774 int16_t *chrBuf= chrPixBuf[0];
6858492e 2775 int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpPixBuf[0] : NULL;
40fa5140 2776 c->yuv2yuv1(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW);
dd68318c 2777 } else { //General YV12
40fa5140 2778 c->yuv2yuvX(c,
9b734d44
RP
2779 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
2780 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2781 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
2da0d70d 2782 }
dd68318c 2783 } else {
fcc402b1
LB
2784 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2785 assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
dd68318c 2786 if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2da0d70d 2787 int chrAlpha= vChrFilter[2*dstY+1];
dd68318c 2788 if(flags & SWS_FULL_CHR_H_INT) {
f0faee4c 2789 yuv2rgbXinC_full(c, //FIXME write a packed1_full function
9b734d44
RP
2790 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2791 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2792 alpSrcPtr, dest, dstW, dstY);
dd68318c 2793 } else {
40fa5140 2794 c->yuv2packed1(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
9b734d44
RP
2795 alpPixBuf ? *alpSrcPtr : NULL,
2796 dest, dstW, chrAlpha, dstFormat, flags, dstY);
f0faee4c 2797 }
dd68318c 2798 } else if (vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2da0d70d
DB
2799 int lumAlpha= vLumFilter[2*dstY+1];
2800 int chrAlpha= vChrFilter[2*dstY+1];
2801 lumMmxFilter[2]=
2802 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
2803 chrMmxFilter[2]=
2804 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
dd68318c 2805 if(flags & SWS_FULL_CHR_H_INT) {
f0faee4c 2806 yuv2rgbXinC_full(c, //FIXME write a packed2_full function
9b734d44
RP
2807 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2808 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2809 alpSrcPtr, dest, dstW, dstY);
dd68318c 2810 } else {
40fa5140 2811 c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
9b734d44
RP
2812 alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
2813 dest, dstW, lumAlpha, chrAlpha, dstY);
f0faee4c 2814 }
dd68318c
RP
2815 } else { //general RGB
2816 if(flags & SWS_FULL_CHR_H_INT) {
f0faee4c 2817 yuv2rgbXinC_full(c,
9b734d44
RP
2818 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2819 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2820 alpSrcPtr, dest, dstW, dstY);
dd68318c 2821 } else {
40fa5140 2822 c->yuv2packedX(c,
9b734d44
RP
2823 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2824 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2825 alpSrcPtr, dest, dstW, dstY);
f0faee4c 2826 }
2da0d70d
DB
2827 }
2828 }
dd68318c 2829 } else { // hmm looks like we can't use MMX here without overwriting this array's tail
7ac40327
RP
2830 const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2831 const int16_t **chrSrcPtr= (const int16_t **)chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2832 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
dd68318c 2833 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
2da0d70d
DB
2834 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2835 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
2836 yuv2nv12XinC(
9b734d44
RP
2837 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
2838 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2839 dest, uDest, dstW, chrDstW, dstFormat);
dd68318c 2840 } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
2da0d70d
DB
2841 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2842 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
dd68318c 2843 if (is16BPS(dstFormat)) {
52154148 2844 yuv2yuvX16inC(
9b734d44
RP
2845 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
2846 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2847 alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
2848 dstFormat);
dd68318c 2849 } else {
ebe5dec2 2850 yuv2yuvXinC(
9b734d44
RP
2851 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
2852 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2853 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
52154148 2854 }
dd68318c 2855 } else {
fcc402b1
LB
2856 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2857 assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
dd68318c 2858 if(flags & SWS_FULL_CHR_H_INT) {
f0faee4c 2859 yuv2rgbXinC_full(c,
9b734d44
RP
2860 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2861 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2862 alpSrcPtr, dest, dstW, dstY);
dd68318c 2863 } else {
14014d47 2864 yuv2packedXinC(c,
9b734d44
RP
2865 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2866 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2867 alpSrcPtr, dest, dstW, dstY);
f0faee4c 2868 }
2da0d70d
DB
2869 }
2870 }
2871 }
17f715fa 2872
6268f55b
CS
2873 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2874 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2875
94daf2e9 2876#if COMPILE_TEMPLATE_MMX
5b7c7dd3
RP
2877 if (flags & SWS_CPU_CAPS_MMX2 ) __asm__ volatile("sfence":::"memory");
2878 /* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
2879 if (flags & SWS_CPU_CAPS_3DNOW) __asm__ volatile("femms" :::"memory");
2880 else __asm__ volatile("emms" :::"memory");
17f715fa 2881#endif
2da0d70d
DB
2882 /* store changed local vars back in the context */
2883 c->dstY= dstY;
2884 c->lumBufIndex= lumBufIndex;
2885 c->chrBufIndex= chrBufIndex;
2886 c->lastInLumBuf= lastInLumBuf;
2887 c->lastInChrBuf= lastInChrBuf;
d4e24275 2888
2da0d70d 2889 return dstY - lastDstY;
627690b5 2890}
40fa5140
RP
2891
2892static void RENAME(sws_init_swScale)(SwsContext *c)
2893{
2894 enum PixelFormat srcFormat = c->srcFormat;
2895
2896 c->yuv2nv12X = RENAME(yuv2nv12X );
2897 c->yuv2yuv1 = RENAME(yuv2yuv1 );
2898 c->yuv2yuvX = RENAME(yuv2yuvX );
2899 c->yuv2packed1 = RENAME(yuv2packed1 );
2900 c->yuv2packed2 = RENAME(yuv2packed2 );
2901 c->yuv2packedX = RENAME(yuv2packedX );
2902
2903 c->hScale = RENAME(hScale );
2904
2905 c->hyscale_fast = RENAME(hyscale_fast);
2906 c->hcscale_fast = RENAME(hcscale_fast);
2907
2908 c->hcscale_internal = NULL;
2909 switch(srcFormat) {
2910 case PIX_FMT_YUYV422 : c->hcscale_internal = RENAME(yuy2ToUV); break;
2911 case PIX_FMT_UYVY422 : c->hcscale_internal = RENAME(uyvyToUV); break;
2912 case PIX_FMT_RGB8 :
2913 case PIX_FMT_BGR8 :
2914 case PIX_FMT_PAL8 :
2915 case PIX_FMT_BGR4_BYTE:
80704c47 2916 case PIX_FMT_RGB4_BYTE: c->hcscale_internal = palToUV; break;
de1275d5
MN
2917 case PIX_FMT_YUV420PBE:
2918 case PIX_FMT_YUV422PBE:
2919 case PIX_FMT_YUV444PBE: c->hcscale_internal = RENAME(BEToUV); break;
2920 case PIX_FMT_YUV420PLE:
2921 case PIX_FMT_YUV422PLE:
2922 case PIX_FMT_YUV444PLE: c->hcscale_internal = RENAME(LEToUV); break;
40fa5140
RP
2923 }
2924 if (c->chrSrcHSubSample) {
2925 switch(srcFormat) {
e8417235
KS
2926 case PIX_FMT_RGB48BE:
2927 case PIX_FMT_RGB48LE: c->hcscale_internal = rgb48ToUV_half; break;
40fa5140 2928 case PIX_FMT_RGB32 :
80704c47 2929 case PIX_FMT_RGB32_1: c->hcscale_internal = bgr32ToUV_half; break;
40fa5140 2930 case PIX_FMT_BGR24 : c->hcscale_internal = RENAME(bgr24ToUV_half); break;
80704c47
KS
2931 case PIX_FMT_BGR565 : c->hcscale_internal = bgr16ToUV_half; break;
2932 case PIX_FMT_BGR555 : c->hcscale_internal = bgr15ToUV_half; break;
40fa5140 2933 case PIX_FMT_BGR32 :
80704c47 2934 case PIX_FMT_BGR32_1: c->hcscale_internal = rgb32ToUV_half; break;
40fa5140 2935 case PIX_FMT_RGB24 : c->hcscale_internal = RENAME(rgb24ToUV_half); break;
80704c47
KS
2936 case PIX_FMT_RGB565 : c->hcscale_internal = rgb16ToUV_half; break;
2937 case PIX_FMT_RGB555 : c->hcscale_internal = rgb15ToUV_half; break;
40fa5140
RP
2938 }
2939 } else {
2940 switch(srcFormat) {
e8417235
KS
2941 case PIX_FMT_RGB48BE:
2942 case PIX_FMT_RGB48LE: c->hcscale_internal = rgb48ToUV; break;
40fa5140 2943 case PIX_FMT_RGB32 :
80704c47 2944 case PIX_FMT_RGB32_1: c->hcscale_internal = bgr32ToUV; break;
40fa5140 2945 case PIX_FMT_BGR24 : c->hcscale_internal = RENAME(bgr24ToUV); break;
80704c47
KS
2946 case PIX_FMT_BGR565 : c->hcscale_internal = bgr16ToUV; break;
2947 case PIX_FMT_BGR555 : c->hcscale_internal = bgr15ToUV; break;
40fa5140 2948 case PIX_FMT_BGR32 :
80704c47 2949 case PIX_FMT_BGR32_1: c->hcscale_internal = rgb32ToUV; break;
40fa5140 2950 case PIX_FMT_RGB24 : c->hcscale_internal = RENAME(rgb24ToUV); break;
80704c47
KS
2951 case PIX_FMT_RGB565 : c->hcscale_internal = rgb16ToUV; break;
2952 case PIX_FMT_RGB555 : c->hcscale_internal = rgb15ToUV; break;
40fa5140
RP
2953 }
2954 }
2955
2956 c->hyscale_internal = NULL;
39e5f87b 2957 c->hascale_internal = NULL;
40fa5140
RP
2958 switch (srcFormat) {
2959 case PIX_FMT_YUYV422 :
de1275d5
MN
2960 case PIX_FMT_YUV420PBE:
2961 case PIX_FMT_YUV422PBE:
2962 case PIX_FMT_YUV444PBE:
40fa5140
RP
2963 case PIX_FMT_GRAY16BE : c->hyscale_internal = RENAME(yuy2ToY); break;
2964 case PIX_FMT_UYVY422 :
de1275d5
MN
2965 case PIX_FMT_YUV420PLE:
2966 case PIX_FMT_YUV422PLE:
2967 case PIX_FMT_YUV444PLE:
40fa5140
RP
2968 case PIX_FMT_GRAY16LE : c->hyscale_internal = RENAME(uyvyToY); break;
2969 case PIX_FMT_BGR24 : c->hyscale_internal = RENAME(bgr24ToY); break;
80704c47
KS
2970 case PIX_FMT_BGR565 : c->hyscale_internal = bgr16ToY; break;
2971 case PIX_FMT_BGR555 : c->hyscale_internal = bgr15ToY; break;
40fa5140 2972 case PIX_FMT_RGB24 : c->hyscale_internal = RENAME(rgb24ToY); break;
80704c47
KS
2973 case PIX_FMT_RGB565 : c->hyscale_internal = rgb16ToY; break;
2974 case PIX_FMT_RGB555 : c->hyscale_internal = rgb15ToY; break;
40fa5140
RP
2975 case PIX_FMT_RGB8 :
2976 case PIX_FMT_BGR8 :
2977 case PIX_FMT_PAL8 :
2978 case PIX_FMT_BGR4_BYTE:
80704c47
KS
2979 case PIX_FMT_RGB4_BYTE: c->hyscale_internal = palToY; break;
2980 case PIX_FMT_MONOBLACK: c->hyscale_internal = monoblack2Y; break;
2981 case PIX_FMT_MONOWHITE: c->hyscale_internal = monowhite2Y; break;
39e5f87b 2982 case PIX_FMT_RGB32 :
80704c47 2983 case PIX_FMT_RGB32_1: c->hyscale_internal = bgr32ToY; break;
39e5f87b 2984 case PIX_FMT_BGR32 :
80704c47 2985 case PIX_FMT_BGR32_1: c->hyscale_internal = rgb32ToY; break;
e8417235
KS
2986 case PIX_FMT_RGB48BE:
2987 case PIX_FMT_RGB48LE: c->hyscale_internal = rgb48ToY; break;
40fa5140
RP
2988 }
2989 if (c->alpPixBuf) {
2990 switch (srcFormat) {
2991 case PIX_FMT_RGB32 :
2992 case PIX_FMT_RGB32_1:
2993 case PIX_FMT_BGR32 :
80704c47 2994 case PIX_FMT_BGR32_1: c->hascale_internal = abgrToA; break;
40fa5140
RP
2995 }
2996 }
2997}