Convert two tables to signed decimal
[libav.git] / libswscale / swscale_template.c
CommitLineData
fe8054c0 1/*
d026b45e
DB
2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with FFmpeg; if not, write to the Free Software
b19bcbaa 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
d026b45e 19 *
8a322796
DB
20 * The C code (not assembly, MMX, ...) of this file can be used
21 * under the LGPL license.
d026b45e 22 */
783e9cc9 23
6e1c66bc 24#undef REAL_MOVNTQ
541c4eb9 25#undef MOVNTQ
7d7f78b5 26#undef PAVGB
48a05cec
MN
27#undef PREFETCH
28#undef PREFETCHW
29#undef EMMS
30#undef SFENCE
31
32#ifdef HAVE_3DNOW
8a322796 33/* On K6 femms is faster than emms. On K7 femms is directly mapped on emms. */
48a05cec
MN
34#define EMMS "femms"
35#else
36#define EMMS "emms"
37#endif
38
39#ifdef HAVE_3DNOW
40#define PREFETCH "prefetch"
41#define PREFETCHW "prefetchw"
e5091488 42#elif defined (HAVE_MMX2)
48a05cec
MN
43#define PREFETCH "prefetchnta"
44#define PREFETCHW "prefetcht0"
45#else
d904b5fc
NP
46#define PREFETCH " # nop"
47#define PREFETCHW " # nop"
48a05cec
MN
48#endif
49
50#ifdef HAVE_MMX2
51#define SFENCE "sfence"
52#else
d904b5fc 53#define SFENCE " # nop"
48a05cec 54#endif
d3f41512 55
d604bab9
MN
56#ifdef HAVE_MMX2
57#define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
58#elif defined (HAVE_3DNOW)
59#define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
60#endif
d3f41512 61
d604bab9 62#ifdef HAVE_MMX2
6e1c66bc 63#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
d604bab9 64#else
6e1c66bc 65#define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
d604bab9 66#endif
6e1c66bc 67#define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
d604bab9 68
a2faa401
RD
69#ifdef HAVE_ALTIVEC
70#include "swscale_altivec_template.c"
71#endif
72
bca11e75 73#define YSCALEYUV2YV12X(x, offset, dest, width) \
2da0d70d
DB
74 asm volatile(\
75 "xor %%"REG_a", %%"REG_a" \n\t"\
76 "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
77 "movq %%mm3, %%mm4 \n\t"\
78 "lea " offset "(%0), %%"REG_d" \n\t"\
79 "mov (%%"REG_d"), %%"REG_S" \n\t"\
80 ASMALIGN(4) /* FIXME Unroll? */\
81 "1: \n\t"\
82 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
8b2fce0d
MN
83 "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\
84 "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" /* srcData */\
2da0d70d
DB
85 "add $16, %%"REG_d" \n\t"\
86 "mov (%%"REG_d"), %%"REG_S" \n\t"\
87 "test %%"REG_S", %%"REG_S" \n\t"\
88 "pmulhw %%mm0, %%mm2 \n\t"\
89 "pmulhw %%mm0, %%mm5 \n\t"\
90 "paddw %%mm2, %%mm3 \n\t"\
91 "paddw %%mm5, %%mm4 \n\t"\
92 " jnz 1b \n\t"\
93 "psraw $3, %%mm3 \n\t"\
94 "psraw $3, %%mm4 \n\t"\
95 "packuswb %%mm4, %%mm3 \n\t"\
96 MOVNTQ(%%mm3, (%1, %%REGa))\
97 "add $8, %%"REG_a" \n\t"\
98 "cmp %2, %%"REG_a" \n\t"\
99 "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
100 "movq %%mm3, %%mm4 \n\t"\
101 "lea " offset "(%0), %%"REG_d" \n\t"\
102 "mov (%%"REG_d"), %%"REG_S" \n\t"\
103 "jb 1b \n\t"\
104 :: "r" (&c->redDither),\
105 "r" (dest), "g" (width)\
106 : "%"REG_a, "%"REG_d, "%"REG_S\
107 );
bca11e75
MN
108
109#define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
2da0d70d
DB
110 asm volatile(\
111 "lea " offset "(%0), %%"REG_d" \n\t"\
112 "xor %%"REG_a", %%"REG_a" \n\t"\
113 "pxor %%mm4, %%mm4 \n\t"\
114 "pxor %%mm5, %%mm5 \n\t"\
115 "pxor %%mm6, %%mm6 \n\t"\
116 "pxor %%mm7, %%mm7 \n\t"\
117 "mov (%%"REG_d"), %%"REG_S" \n\t"\
118 ASMALIGN(4) \
119 "1: \n\t"\
8b2fce0d
MN
120 "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* srcData */\
121 "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\
1625216e 122 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
8b2fce0d 123 "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm1 \n\t" /* srcData */\
2da0d70d
DB
124 "movq %%mm0, %%mm3 \n\t"\
125 "punpcklwd %%mm1, %%mm0 \n\t"\
126 "punpckhwd %%mm1, %%mm3 \n\t"\
1625216e 127 "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
2da0d70d
DB
128 "pmaddwd %%mm1, %%mm0 \n\t"\
129 "pmaddwd %%mm1, %%mm3 \n\t"\
130 "paddd %%mm0, %%mm4 \n\t"\
131 "paddd %%mm3, %%mm5 \n\t"\
8b2fce0d 132 "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* srcData */\
1625216e
MN
133 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
134 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
2da0d70d
DB
135 "test %%"REG_S", %%"REG_S" \n\t"\
136 "movq %%mm2, %%mm0 \n\t"\
137 "punpcklwd %%mm3, %%mm2 \n\t"\
138 "punpckhwd %%mm3, %%mm0 \n\t"\
139 "pmaddwd %%mm1, %%mm2 \n\t"\
140 "pmaddwd %%mm1, %%mm0 \n\t"\
141 "paddd %%mm2, %%mm6 \n\t"\
142 "paddd %%mm0, %%mm7 \n\t"\
143 " jnz 1b \n\t"\
144 "psrad $16, %%mm4 \n\t"\
145 "psrad $16, %%mm5 \n\t"\
146 "psrad $16, %%mm6 \n\t"\
147 "psrad $16, %%mm7 \n\t"\
148 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
149 "packssdw %%mm5, %%mm4 \n\t"\
150 "packssdw %%mm7, %%mm6 \n\t"\
151 "paddw %%mm0, %%mm4 \n\t"\
152 "paddw %%mm0, %%mm6 \n\t"\
153 "psraw $3, %%mm4 \n\t"\
154 "psraw $3, %%mm6 \n\t"\
155 "packuswb %%mm6, %%mm4 \n\t"\
156 MOVNTQ(%%mm4, (%1, %%REGa))\
157 "add $8, %%"REG_a" \n\t"\
158 "cmp %2, %%"REG_a" \n\t"\
159 "lea " offset "(%0), %%"REG_d" \n\t"\
160 "pxor %%mm4, %%mm4 \n\t"\
161 "pxor %%mm5, %%mm5 \n\t"\
162 "pxor %%mm6, %%mm6 \n\t"\
163 "pxor %%mm7, %%mm7 \n\t"\
164 "mov (%%"REG_d"), %%"REG_S" \n\t"\
165 "jb 1b \n\t"\
166 :: "r" (&c->redDither),\
167 "r" (dest), "g" (width)\
168 : "%"REG_a, "%"REG_d, "%"REG_S\
169 );
c1b0bfb4
MN
170
171#define YSCALEYUV2YV121 \
2da0d70d
DB
172 "mov %2, %%"REG_a" \n\t"\
173 ASMALIGN(4) /* FIXME Unroll? */\
174 "1: \n\t"\
175 "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
176 "movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\
177 "psraw $7, %%mm0 \n\t"\
178 "psraw $7, %%mm1 \n\t"\
179 "packuswb %%mm1, %%mm0 \n\t"\
180 MOVNTQ(%%mm0, (%1, %%REGa))\
181 "add $8, %%"REG_a" \n\t"\
182 "jnc 1b \n\t"
c1b0bfb4 183
bf2bdde6
MN
184#define YSCALEYUV2YV121_ACCURATE \
185 "mov %2, %%"REG_a" \n\t"\
186 "pcmpeqw %%mm7, %%mm7 \n\t"\
187 "psrlw $15, %%mm7 \n\t"\
188 "psllw $6, %%mm7 \n\t"\
189 ASMALIGN(4) /* FIXME Unroll? */\
190 "1: \n\t"\
191 "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
192 "movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\
193 "paddw %%mm7, %%mm0 \n\t"\
194 "paddw %%mm7, %%mm1 \n\t"\
195 "psraw $7, %%mm0 \n\t"\
196 "psraw $7, %%mm1 \n\t"\
197 "packuswb %%mm1, %%mm0 \n\t"\
198 MOVNTQ(%%mm0, (%1, %%REGa))\
199 "add $8, %%"REG_a" \n\t"\
200 "jnc 1b \n\t"
201
c1b0bfb4 202/*
2da0d70d
DB
203 :: "m" (-lumFilterSize), "m" (-chrFilterSize),
204 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4),
205 "r" (dest), "m" (dstW),
206 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
207 : "%eax", "%ebx", "%ecx", "%edx", "%esi"
c1b0bfb4 208*/
25593e29 209#define YSCALEYUV2PACKEDX \
2da0d70d
DB
210 asm volatile(\
211 "xor %%"REG_a", %%"REG_a" \n\t"\
212 ASMALIGN(4)\
213 "nop \n\t"\
214 "1: \n\t"\
215 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
216 "mov (%%"REG_d"), %%"REG_S" \n\t"\
217 "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
218 "movq %%mm3, %%mm4 \n\t"\
219 ASMALIGN(4)\
220 "2: \n\t"\
221 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
222 "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\
8b2fce0d 223 "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm5 \n\t" /* VsrcData */\
2da0d70d
DB
224 "add $16, %%"REG_d" \n\t"\
225 "mov (%%"REG_d"), %%"REG_S" \n\t"\
226 "pmulhw %%mm0, %%mm2 \n\t"\
227 "pmulhw %%mm0, %%mm5 \n\t"\
228 "paddw %%mm2, %%mm3 \n\t"\
229 "paddw %%mm5, %%mm4 \n\t"\
230 "test %%"REG_S", %%"REG_S" \n\t"\
231 " jnz 2b \n\t"\
c1b0bfb4 232\
2da0d70d
DB
233 "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
234 "mov (%%"REG_d"), %%"REG_S" \n\t"\
235 "movq "VROUNDER_OFFSET"(%0), %%mm1 \n\t"\
236 "movq %%mm1, %%mm7 \n\t"\
237 ASMALIGN(4)\
238 "2: \n\t"\
239 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
240 "movq (%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y1srcData */\
241 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" /* Y2srcData */\
242 "add $16, %%"REG_d" \n\t"\
243 "mov (%%"REG_d"), %%"REG_S" \n\t"\
244 "pmulhw %%mm0, %%mm2 \n\t"\
245 "pmulhw %%mm0, %%mm5 \n\t"\
246 "paddw %%mm2, %%mm1 \n\t"\
247 "paddw %%mm5, %%mm7 \n\t"\
248 "test %%"REG_S", %%"REG_S" \n\t"\
249 " jnz 2b \n\t"\
250
251#define YSCALEYUV2PACKEDX_END \
252 :: "r" (&c->redDither), \
253 "m" (dummy), "m" (dummy), "m" (dummy),\
254 "r" (dest), "m" (dstW) \
255 : "%"REG_a, "%"REG_d, "%"REG_S \
256 );
8422aa88 257
bca11e75 258#define YSCALEYUV2PACKEDX_ACCURATE \
2da0d70d
DB
259 asm volatile(\
260 "xor %%"REG_a", %%"REG_a" \n\t"\
261 ASMALIGN(4)\
262 "nop \n\t"\
263 "1: \n\t"\
264 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
265 "mov (%%"REG_d"), %%"REG_S" \n\t"\
266 "pxor %%mm4, %%mm4 \n\t"\
267 "pxor %%mm5, %%mm5 \n\t"\
268 "pxor %%mm6, %%mm6 \n\t"\
269 "pxor %%mm7, %%mm7 \n\t"\
270 ASMALIGN(4)\
271 "2: \n\t"\
272 "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\
8b2fce0d 273 "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\
1625216e 274 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
2da0d70d
DB
275 "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\
276 "movq %%mm0, %%mm3 \n\t"\
277 "punpcklwd %%mm1, %%mm0 \n\t"\
278 "punpckhwd %%mm1, %%mm3 \n\t"\
1625216e 279 "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1 \n\t" /* filterCoeff */\
2da0d70d
DB
280 "pmaddwd %%mm1, %%mm0 \n\t"\
281 "pmaddwd %%mm1, %%mm3 \n\t"\
282 "paddd %%mm0, %%mm4 \n\t"\
283 "paddd %%mm3, %%mm5 \n\t"\
8b2fce0d 284 "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\
1625216e
MN
285 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
286 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
2da0d70d
DB
287 "test %%"REG_S", %%"REG_S" \n\t"\
288 "movq %%mm2, %%mm0 \n\t"\
289 "punpcklwd %%mm3, %%mm2 \n\t"\
290 "punpckhwd %%mm3, %%mm0 \n\t"\
291 "pmaddwd %%mm1, %%mm2 \n\t"\
292 "pmaddwd %%mm1, %%mm0 \n\t"\
293 "paddd %%mm2, %%mm6 \n\t"\
294 "paddd %%mm0, %%mm7 \n\t"\
295 " jnz 2b \n\t"\
296 "psrad $16, %%mm4 \n\t"\
297 "psrad $16, %%mm5 \n\t"\
298 "psrad $16, %%mm6 \n\t"\
299 "psrad $16, %%mm7 \n\t"\
300 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
301 "packssdw %%mm5, %%mm4 \n\t"\
302 "packssdw %%mm7, %%mm6 \n\t"\
303 "paddw %%mm0, %%mm4 \n\t"\
304 "paddw %%mm0, %%mm6 \n\t"\
305 "movq %%mm4, "U_TEMP"(%0) \n\t"\
306 "movq %%mm6, "V_TEMP"(%0) \n\t"\
bca11e75 307\
2da0d70d
DB
308 "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
309 "mov (%%"REG_d"), %%"REG_S" \n\t"\
310 "pxor %%mm1, %%mm1 \n\t"\
311 "pxor %%mm5, %%mm5 \n\t"\
312 "pxor %%mm7, %%mm7 \n\t"\
313 "pxor %%mm6, %%mm6 \n\t"\
314 ASMALIGN(4)\
315 "2: \n\t"\
316 "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\
317 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\
1625216e 318 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
2da0d70d
DB
319 "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\
320 "movq %%mm0, %%mm3 \n\t"\
321 "punpcklwd %%mm4, %%mm0 \n\t"\
322 "punpckhwd %%mm4, %%mm3 \n\t"\
1625216e 323 "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\
2da0d70d
DB
324 "pmaddwd %%mm4, %%mm0 \n\t"\
325 "pmaddwd %%mm4, %%mm3 \n\t"\
326 "paddd %%mm0, %%mm1 \n\t"\
327 "paddd %%mm3, %%mm5 \n\t"\
328 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\
1625216e
MN
329 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
330 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
2da0d70d
DB
331 "test %%"REG_S", %%"REG_S" \n\t"\
332 "movq %%mm2, %%mm0 \n\t"\
333 "punpcklwd %%mm3, %%mm2 \n\t"\
334 "punpckhwd %%mm3, %%mm0 \n\t"\
335 "pmaddwd %%mm4, %%mm2 \n\t"\
336 "pmaddwd %%mm4, %%mm0 \n\t"\
337 "paddd %%mm2, %%mm7 \n\t"\
338 "paddd %%mm0, %%mm6 \n\t"\
339 " jnz 2b \n\t"\
340 "psrad $16, %%mm1 \n\t"\
341 "psrad $16, %%mm5 \n\t"\
342 "psrad $16, %%mm7 \n\t"\
343 "psrad $16, %%mm6 \n\t"\
344 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
345 "packssdw %%mm5, %%mm1 \n\t"\
346 "packssdw %%mm6, %%mm7 \n\t"\
347 "paddw %%mm0, %%mm1 \n\t"\
348 "paddw %%mm0, %%mm7 \n\t"\
349 "movq "U_TEMP"(%0), %%mm3 \n\t"\
350 "movq "V_TEMP"(%0), %%mm4 \n\t"\
bca11e75 351
8422aa88 352#define YSCALEYUV2RGBX \
2da0d70d
DB
353 "psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\
354 "psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\
355 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
356 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
357 "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
358 "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
359/* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
360 "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
361 "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
362 "psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y-16)*/\
363 "psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y-16)*/\
364 "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
365 "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
366/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
367 "paddw %%mm3, %%mm4 \n\t"\
368 "movq %%mm2, %%mm0 \n\t"\
369 "movq %%mm5, %%mm6 \n\t"\
370 "movq %%mm4, %%mm3 \n\t"\
371 "punpcklwd %%mm2, %%mm2 \n\t"\
372 "punpcklwd %%mm5, %%mm5 \n\t"\
373 "punpcklwd %%mm4, %%mm4 \n\t"\
374 "paddw %%mm1, %%mm2 \n\t"\
375 "paddw %%mm1, %%mm5 \n\t"\
376 "paddw %%mm1, %%mm4 \n\t"\
377 "punpckhwd %%mm0, %%mm0 \n\t"\
378 "punpckhwd %%mm6, %%mm6 \n\t"\
379 "punpckhwd %%mm3, %%mm3 \n\t"\
380 "paddw %%mm7, %%mm0 \n\t"\
381 "paddw %%mm7, %%mm6 \n\t"\
382 "paddw %%mm7, %%mm3 \n\t"\
383 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
384 "packuswb %%mm0, %%mm2 \n\t"\
385 "packuswb %%mm6, %%mm5 \n\t"\
386 "packuswb %%mm3, %%mm4 \n\t"\
387 "pxor %%mm7, %%mm7 \n\t"
77a49659 388#if 0
d604bab9 389#define FULL_YSCALEYUV2RGB \
2da0d70d
DB
390 "pxor %%mm7, %%mm7 \n\t"\
391 "movd %6, %%mm6 \n\t" /*yalpha1*/\
392 "punpcklwd %%mm6, %%mm6 \n\t"\
393 "punpcklwd %%mm6, %%mm6 \n\t"\
394 "movd %7, %%mm5 \n\t" /*uvalpha1*/\
395 "punpcklwd %%mm5, %%mm5 \n\t"\
396 "punpcklwd %%mm5, %%mm5 \n\t"\
397 "xor %%"REG_a", %%"REG_a" \n\t"\
398 ASMALIGN(4)\
399 "1: \n\t"\
400 "movq (%0, %%"REG_a",2), %%mm0 \n\t" /*buf0[eax]*/\
401 "movq (%1, %%"REG_a",2), %%mm1 \n\t" /*buf1[eax]*/\
402 "movq (%2, %%"REG_a",2), %%mm2 \n\t" /* uvbuf0[eax]*/\
403 "movq (%3, %%"REG_a",2), %%mm3 \n\t" /* uvbuf1[eax]*/\
404 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
405 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
406 "pmulhw %%mm6, %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
407 "pmulhw %%mm5, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
408 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8b2fce0d 409 "movq "AV_STRINGIFY(VOF)"(%2, %%"REG_a",2), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
2da0d70d
DB
410 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
411 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
8b2fce0d 412 "movq "AV_STRINGIFY(VOF)"(%3, %%"REG_a",2), %%mm0 \n\t" /* uvbuf1[eax+2048]*/\
2da0d70d
DB
413 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
414 "psubw %%mm0, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
415 "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\
416 "psubw "MANGLE(w400)", %%mm3 \n\t" /* 8(U-128)*/\
417 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\
d604bab9
MN
418\
419\
2da0d70d
DB
420 "pmulhw %%mm5, %%mm4 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
421 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
422 "pmulhw "MANGLE(ubCoeff)", %%mm3 \n\t"\
423 "psraw $4, %%mm0 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
424 "pmulhw "MANGLE(ugCoeff)", %%mm2 \n\t"\
425 "paddw %%mm4, %%mm0 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
426 "psubw "MANGLE(w400)", %%mm0 \n\t" /* (V-128)8*/\
d604bab9
MN
427\
428\
2da0d70d
DB
429 "movq %%mm0, %%mm4 \n\t" /* (V-128)8*/\
430 "pmulhw "MANGLE(vrCoeff)", %%mm0 \n\t"\
431 "pmulhw "MANGLE(vgCoeff)", %%mm4 \n\t"\
432 "paddw %%mm1, %%mm3 \n\t" /* B*/\
433 "paddw %%mm1, %%mm0 \n\t" /* R*/\
434 "packuswb %%mm3, %%mm3 \n\t"\
d604bab9 435\
2da0d70d
DB
436 "packuswb %%mm0, %%mm0 \n\t"\
437 "paddw %%mm4, %%mm2 \n\t"\
438 "paddw %%mm2, %%mm1 \n\t" /* G*/\
d604bab9 439\
2da0d70d 440 "packuswb %%mm1, %%mm1 \n\t"
77a49659 441#endif
d604bab9 442
6e1c66bc 443#define REAL_YSCALEYUV2PACKED(index, c) \
2da0d70d
DB
444 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
445 "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1 \n\t"\
446 "psraw $3, %%mm0 \n\t"\
447 "psraw $3, %%mm1 \n\t"\
448 "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
449 "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
450 "xor "#index", "#index" \n\t"\
451 ASMALIGN(4)\
452 "1: \n\t"\
453 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
454 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8b2fce0d
MN
455 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
456 "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
2da0d70d
DB
457 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
458 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
459 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
460 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
461 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
462 "psraw $7, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
463 "psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
464 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
465 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
466 "movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\
467 "movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\
468 "movq 8(%0, "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\
469 "movq 8(%1, "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\
470 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
471 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
472 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
473 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
474 "psraw $7, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
475 "psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
476 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
477 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
6a4970ab 478
6e1c66bc 479#define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c)
6a4970ab 480
6e1c66bc 481#define REAL_YSCALEYUV2RGB(index, c) \
2da0d70d
DB
482 "xor "#index", "#index" \n\t"\
483 ASMALIGN(4)\
484 "1: \n\t"\
485 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
486 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8b2fce0d
MN
487 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
488 "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
2da0d70d
DB
489 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
490 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
491 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
492 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
493 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
494 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
495 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
496 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
497 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
498 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
499 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
500 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
501 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
502 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
503 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
504 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
505 "movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\
506 "movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\
507 "movq 8(%0, "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\
508 "movq 8(%1, "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\
509 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
510 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
511 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
512 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
513 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
514 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
515 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
516 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
517 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
518 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
519 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
520 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
521 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
522 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
523 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
524 "paddw %%mm3, %%mm4 \n\t"\
525 "movq %%mm2, %%mm0 \n\t"\
526 "movq %%mm5, %%mm6 \n\t"\
527 "movq %%mm4, %%mm3 \n\t"\
528 "punpcklwd %%mm2, %%mm2 \n\t"\
529 "punpcklwd %%mm5, %%mm5 \n\t"\
530 "punpcklwd %%mm4, %%mm4 \n\t"\
531 "paddw %%mm1, %%mm2 \n\t"\
532 "paddw %%mm1, %%mm5 \n\t"\
533 "paddw %%mm1, %%mm4 \n\t"\
534 "punpckhwd %%mm0, %%mm0 \n\t"\
535 "punpckhwd %%mm6, %%mm6 \n\t"\
536 "punpckhwd %%mm3, %%mm3 \n\t"\
537 "paddw %%mm7, %%mm0 \n\t"\
538 "paddw %%mm7, %%mm6 \n\t"\
539 "paddw %%mm7, %%mm3 \n\t"\
540 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
541 "packuswb %%mm0, %%mm2 \n\t"\
542 "packuswb %%mm6, %%mm5 \n\t"\
543 "packuswb %%mm3, %%mm4 \n\t"\
544 "pxor %%mm7, %%mm7 \n\t"
6e1c66bc 545#define YSCALEYUV2RGB(index, c) REAL_YSCALEYUV2RGB(index, c)
6a4970ab 546
6e1c66bc 547#define REAL_YSCALEYUV2PACKED1(index, c) \
2da0d70d
DB
548 "xor "#index", "#index" \n\t"\
549 ASMALIGN(4)\
550 "1: \n\t"\
551 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
8b2fce0d 552 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
2da0d70d
DB
553 "psraw $7, %%mm3 \n\t" \
554 "psraw $7, %%mm4 \n\t" \
555 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
556 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
557 "psraw $7, %%mm1 \n\t" \
558 "psraw $7, %%mm7 \n\t" \
6a4970ab 559
6e1c66bc 560#define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c)
6a4970ab 561
6e1c66bc 562#define REAL_YSCALEYUV2RGB1(index, c) \
2da0d70d
DB
563 "xor "#index", "#index" \n\t"\
564 ASMALIGN(4)\
565 "1: \n\t"\
566 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
8b2fce0d 567 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
2da0d70d
DB
568 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
569 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
570 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
571 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
572 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
573 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
574 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
575 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
576 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
577 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
578 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
579 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
580 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
581 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
582 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
583 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
584 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
585 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
586 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
587 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
588 "paddw %%mm3, %%mm4 \n\t"\
589 "movq %%mm2, %%mm0 \n\t"\
590 "movq %%mm5, %%mm6 \n\t"\
591 "movq %%mm4, %%mm3 \n\t"\
592 "punpcklwd %%mm2, %%mm2 \n\t"\
593 "punpcklwd %%mm5, %%mm5 \n\t"\
594 "punpcklwd %%mm4, %%mm4 \n\t"\
595 "paddw %%mm1, %%mm2 \n\t"\
596 "paddw %%mm1, %%mm5 \n\t"\
597 "paddw %%mm1, %%mm4 \n\t"\
598 "punpckhwd %%mm0, %%mm0 \n\t"\
599 "punpckhwd %%mm6, %%mm6 \n\t"\
600 "punpckhwd %%mm3, %%mm3 \n\t"\
601 "paddw %%mm7, %%mm0 \n\t"\
602 "paddw %%mm7, %%mm6 \n\t"\
603 "paddw %%mm7, %%mm3 \n\t"\
604 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
605 "packuswb %%mm0, %%mm2 \n\t"\
606 "packuswb %%mm6, %%mm5 \n\t"\
607 "packuswb %%mm3, %%mm4 \n\t"\
608 "pxor %%mm7, %%mm7 \n\t"
6e1c66bc 609#define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c)
497d4f99 610
6e1c66bc 611#define REAL_YSCALEYUV2PACKED1b(index, c) \
2da0d70d
DB
612 "xor "#index", "#index" \n\t"\
613 ASMALIGN(4)\
614 "1: \n\t"\
615 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
616 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8b2fce0d
MN
617 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
618 "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
2da0d70d
DB
619 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
620 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
621 "psrlw $8, %%mm3 \n\t" \
622 "psrlw $8, %%mm4 \n\t" \
623 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
624 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
625 "psraw $7, %%mm1 \n\t" \
626 "psraw $7, %%mm7 \n\t"
6e1c66bc 627#define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c)
6a4970ab 628
497d4f99 629// do vertical chrominance interpolation
6e1c66bc 630#define REAL_YSCALEYUV2RGB1b(index, c) \
2da0d70d
DB
631 "xor "#index", "#index" \n\t"\
632 ASMALIGN(4)\
633 "1: \n\t"\
634 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
635 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8b2fce0d
MN
636 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
637 "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
2da0d70d
DB
638 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
639 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
640 "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\
641 "psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\
642 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
643 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
644 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
645 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
646 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
647 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
648 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
649 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
650 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
651 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
652 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
653 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
654 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
655 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
656 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
657 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
658 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
659 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
660 "paddw %%mm3, %%mm4 \n\t"\
661 "movq %%mm2, %%mm0 \n\t"\
662 "movq %%mm5, %%mm6 \n\t"\
663 "movq %%mm4, %%mm3 \n\t"\
664 "punpcklwd %%mm2, %%mm2 \n\t"\
665 "punpcklwd %%mm5, %%mm5 \n\t"\
666 "punpcklwd %%mm4, %%mm4 \n\t"\
667 "paddw %%mm1, %%mm2 \n\t"\
668 "paddw %%mm1, %%mm5 \n\t"\
669 "paddw %%mm1, %%mm4 \n\t"\
670 "punpckhwd %%mm0, %%mm0 \n\t"\
671 "punpckhwd %%mm6, %%mm6 \n\t"\
672 "punpckhwd %%mm3, %%mm3 \n\t"\
673 "paddw %%mm7, %%mm0 \n\t"\
674 "paddw %%mm7, %%mm6 \n\t"\
675 "paddw %%mm7, %%mm3 \n\t"\
676 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
677 "packuswb %%mm0, %%mm2 \n\t"\
678 "packuswb %%mm6, %%mm5 \n\t"\
679 "packuswb %%mm3, %%mm4 \n\t"\
680 "pxor %%mm7, %%mm7 \n\t"
6e1c66bc 681#define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c)
d604bab9 682
6e1c66bc 683#define REAL_WRITEBGR32(dst, dstw, index) \
2da0d70d
DB
684 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
685 "movq %%mm2, %%mm1 \n\t" /* B */\
686 "movq %%mm5, %%mm6 \n\t" /* R */\
687 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
688 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
689 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
690 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
691 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
692 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
693 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
694 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
695 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
696 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
d604bab9 697\
2da0d70d
DB
698 MOVNTQ(%%mm0, (dst, index, 4))\
699 MOVNTQ(%%mm2, 8(dst, index, 4))\
700 MOVNTQ(%%mm1, 16(dst, index, 4))\
701 MOVNTQ(%%mm3, 24(dst, index, 4))\
d604bab9 702\
2da0d70d
DB
703 "add $8, "#index" \n\t"\
704 "cmp "#dstw", "#index" \n\t"\
705 " jb 1b \n\t"
6e1c66bc 706#define WRITEBGR32(dst, dstw, index) REAL_WRITEBGR32(dst, dstw, index)
d604bab9 707
27a90b04 708#define REAL_WRITERGB16(dst, dstw, index) \
2da0d70d
DB
709 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
710 "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\
711 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
712 "psrlq $3, %%mm2 \n\t"\
d604bab9 713\
2da0d70d
DB
714 "movq %%mm2, %%mm1 \n\t"\
715 "movq %%mm4, %%mm3 \n\t"\
d604bab9 716\
2da0d70d
DB
717 "punpcklbw %%mm7, %%mm3 \n\t"\
718 "punpcklbw %%mm5, %%mm2 \n\t"\
719 "punpckhbw %%mm7, %%mm4 \n\t"\
720 "punpckhbw %%mm5, %%mm1 \n\t"\
d604bab9 721\
2da0d70d
DB
722 "psllq $3, %%mm3 \n\t"\
723 "psllq $3, %%mm4 \n\t"\
d604bab9 724\
2da0d70d
DB
725 "por %%mm3, %%mm2 \n\t"\
726 "por %%mm4, %%mm1 \n\t"\
d604bab9 727\
2da0d70d
DB
728 MOVNTQ(%%mm2, (dst, index, 2))\
729 MOVNTQ(%%mm1, 8(dst, index, 2))\
d604bab9 730\
2da0d70d
DB
731 "add $8, "#index" \n\t"\
732 "cmp "#dstw", "#index" \n\t"\
733 " jb 1b \n\t"
27a90b04 734#define WRITERGB16(dst, dstw, index) REAL_WRITERGB16(dst, dstw, index)
d604bab9 735
27a90b04 736#define REAL_WRITERGB15(dst, dstw, index) \
2da0d70d
DB
737 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
738 "pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\
739 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
740 "psrlq $3, %%mm2 \n\t"\
741 "psrlq $1, %%mm5 \n\t"\
d604bab9 742\
2da0d70d
DB
743 "movq %%mm2, %%mm1 \n\t"\
744 "movq %%mm4, %%mm3 \n\t"\
d604bab9 745\
2da0d70d
DB
746 "punpcklbw %%mm7, %%mm3 \n\t"\
747 "punpcklbw %%mm5, %%mm2 \n\t"\
748 "punpckhbw %%mm7, %%mm4 \n\t"\
749 "punpckhbw %%mm5, %%mm1 \n\t"\
d604bab9 750\
2da0d70d
DB
751 "psllq $2, %%mm3 \n\t"\
752 "psllq $2, %%mm4 \n\t"\
d604bab9 753\
2da0d70d
DB
754 "por %%mm3, %%mm2 \n\t"\
755 "por %%mm4, %%mm1 \n\t"\
d604bab9 756\
2da0d70d
DB
757 MOVNTQ(%%mm2, (dst, index, 2))\
758 MOVNTQ(%%mm1, 8(dst, index, 2))\
d604bab9 759\
2da0d70d
DB
760 "add $8, "#index" \n\t"\
761 "cmp "#dstw", "#index" \n\t"\
762 " jb 1b \n\t"
27a90b04 763#define WRITERGB15(dst, dstw, index) REAL_WRITERGB15(dst, dstw, index)
f62255fb 764
6542b44e 765#define WRITEBGR24OLD(dst, dstw, index) \
2da0d70d
DB
766 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
767 "movq %%mm2, %%mm1 \n\t" /* B */\
768 "movq %%mm5, %%mm6 \n\t" /* R */\
769 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
770 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
771 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
772 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
773 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
774 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
775 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
776 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
777 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
778 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
d604bab9 779\
2da0d70d
DB
780 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
781 "psrlq $8, %%mm0 \n\t" /* 00RGB0RG 0 */\
782 "pand "MANGLE(bm00000111)", %%mm4 \n\t" /* 00000RGB 0 */\
783 "pand "MANGLE(bm11111000)", %%mm0 \n\t" /* 00RGB000 0.5 */\
784 "por %%mm4, %%mm0 \n\t" /* 00RGBRGB 0 */\
785 "movq %%mm2, %%mm4 \n\t" /* 0RGB0RGB 1 */\
786 "psllq $48, %%mm2 \n\t" /* GB000000 1 */\
787 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\
d604bab9 788\
2da0d70d
DB
789 "movq %%mm4, %%mm2 \n\t" /* 0RGB0RGB 1 */\
790 "psrld $16, %%mm4 \n\t" /* 000R000R 1 */\
791 "psrlq $24, %%mm2 \n\t" /* 0000RGB0 1.5 */\
792 "por %%mm4, %%mm2 \n\t" /* 000RRGBR 1 */\
793 "pand "MANGLE(bm00001111)", %%mm2 \n\t" /* 0000RGBR 1 */\
794 "movq %%mm1, %%mm4 \n\t" /* 0RGB0RGB 2 */\
795 "psrlq $8, %%mm1 \n\t" /* 00RGB0RG 2 */\
796 "pand "MANGLE(bm00000111)", %%mm4 \n\t" /* 00000RGB 2 */\
797 "pand "MANGLE(bm11111000)", %%mm1 \n\t" /* 00RGB000 2.5 */\
798 "por %%mm4, %%mm1 \n\t" /* 00RGBRGB 2 */\
799 "movq %%mm1, %%mm4 \n\t" /* 00RGBRGB 2 */\
800 "psllq $32, %%mm1 \n\t" /* BRGB0000 2 */\
801 "por %%mm1, %%mm2 \n\t" /* BRGBRGBR 1 */\
d604bab9 802\
2da0d70d
DB
803 "psrlq $32, %%mm4 \n\t" /* 000000RG 2.5 */\
804 "movq %%mm3, %%mm5 \n\t" /* 0RGB0RGB 3 */\
805 "psrlq $8, %%mm3 \n\t" /* 00RGB0RG 3 */\
806 "pand "MANGLE(bm00000111)", %%mm5 \n\t" /* 00000RGB 3 */\
807 "pand "MANGLE(bm11111000)", %%mm3 \n\t" /* 00RGB000 3.5 */\
808 "por %%mm5, %%mm3 \n\t" /* 00RGBRGB 3 */\
809 "psllq $16, %%mm3 \n\t" /* RGBRGB00 3 */\
810 "por %%mm4, %%mm3 \n\t" /* RGBRGBRG 2.5 */\
d604bab9 811\
2da0d70d
DB
812 MOVNTQ(%%mm0, (dst))\
813 MOVNTQ(%%mm2, 8(dst))\
814 MOVNTQ(%%mm3, 16(dst))\
815 "add $24, "#dst" \n\t"\
d604bab9 816\
2da0d70d
DB
817 "add $8, "#index" \n\t"\
818 "cmp "#dstw", "#index" \n\t"\
819 " jb 1b \n\t"
d604bab9 820
6542b44e 821#define WRITEBGR24MMX(dst, dstw, index) \
2da0d70d
DB
822 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
823 "movq %%mm2, %%mm1 \n\t" /* B */\
824 "movq %%mm5, %%mm6 \n\t" /* R */\
825 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
826 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
827 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
828 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
829 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
830 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
831 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
832 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
833 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
834 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
99d2cb72 835\
2da0d70d
DB
836 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
837 "movq %%mm2, %%mm6 \n\t" /* 0RGB0RGB 1 */\
838 "movq %%mm1, %%mm5 \n\t" /* 0RGB0RGB 2 */\
839 "movq %%mm3, %%mm7 \n\t" /* 0RGB0RGB 3 */\
99d2cb72 840\
2da0d70d
DB
841 "psllq $40, %%mm0 \n\t" /* RGB00000 0 */\
842 "psllq $40, %%mm2 \n\t" /* RGB00000 1 */\
843 "psllq $40, %%mm1 \n\t" /* RGB00000 2 */\
844 "psllq $40, %%mm3 \n\t" /* RGB00000 3 */\
99d2cb72 845\
2da0d70d
DB
846 "punpckhdq %%mm4, %%mm0 \n\t" /* 0RGBRGB0 0 */\
847 "punpckhdq %%mm6, %%mm2 \n\t" /* 0RGBRGB0 1 */\
848 "punpckhdq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */\
849 "punpckhdq %%mm7, %%mm3 \n\t" /* 0RGBRGB0 3 */\
99d2cb72 850\
2da0d70d
DB
851 "psrlq $8, %%mm0 \n\t" /* 00RGBRGB 0 */\
852 "movq %%mm2, %%mm6 \n\t" /* 0RGBRGB0 1 */\
853 "psllq $40, %%mm2 \n\t" /* GB000000 1 */\
854 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\
855 MOVNTQ(%%mm0, (dst))\
99d2cb72 856\
2da0d70d
DB
857 "psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */\
858 "movq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */\
859 "psllq $24, %%mm1 \n\t" /* BRGB0000 2 */\
860 "por %%mm1, %%mm6 \n\t" /* BRGBRGBR 1 */\
861 MOVNTQ(%%mm6, 8(dst))\
99d2cb72 862\
2da0d70d
DB
863 "psrlq $40, %%mm5 \n\t" /* 000000RG 2 */\
864 "psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */\
865 "por %%mm3, %%mm5 \n\t" /* RGBRGBRG 2 */\
866 MOVNTQ(%%mm5, 16(dst))\
99d2cb72 867\
2da0d70d 868 "add $24, "#dst" \n\t"\
99d2cb72 869\
2da0d70d
DB
870 "add $8, "#index" \n\t"\
871 "cmp "#dstw", "#index" \n\t"\
872 " jb 1b \n\t"
99d2cb72 873
6542b44e 874#define WRITEBGR24MMX2(dst, dstw, index) \
2da0d70d 875 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
5802683a
RD
876 "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
877 "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
2da0d70d
DB
878 "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\
879 "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\
880 "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\
99d2cb72 881\
2da0d70d
DB
882 "pand %%mm0, %%mm1 \n\t" /* B2 B1 B0 */\
883 "pand %%mm0, %%mm3 \n\t" /* G2 G1 G0 */\
884 "pand %%mm7, %%mm6 \n\t" /* R1 R0 */\
99d2cb72 885\
2da0d70d
DB
886 "psllq $8, %%mm3 \n\t" /* G2 G1 G0 */\
887 "por %%mm1, %%mm6 \n\t"\
888 "por %%mm3, %%mm6 \n\t"\
889 MOVNTQ(%%mm6, (dst))\
99d2cb72 890\
2da0d70d
DB
891 "psrlq $8, %%mm4 \n\t" /* 00 G7 G6 G5 G4 G3 G2 G1 */\
892 "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4 B3 B2 B3 B2 */\
893 "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\
894 "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\
99d2cb72 895\
5802683a 896 "pand "MANGLE(ff_M24B)", %%mm1 \n\t" /* B5 B4 B3 */\
2da0d70d
DB
897 "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\
898 "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\
99d2cb72 899\
2da0d70d
DB
900 "por %%mm1, %%mm3 \n\t" /* B5 G4 B4 G3 B3 */\
901 "por %%mm3, %%mm6 \n\t"\
902 MOVNTQ(%%mm6, 8(dst))\
99d2cb72 903\
2da0d70d
DB
904 "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6 B7 B6 B6 B7 */\
905 "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7 G6 G5 G6 G5 */\
906 "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6 R5 R4 R5 R4 */\
99d2cb72 907\
2da0d70d
DB
908 "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\
909 "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\
5802683a 910 "pand "MANGLE(ff_M24B)", %%mm6 \n\t" /* R7 R6 R5 */\
99d2cb72 911\
2da0d70d
DB
912 "por %%mm1, %%mm3 \n\t"\
913 "por %%mm3, %%mm6 \n\t"\
914 MOVNTQ(%%mm6, 16(dst))\
99d2cb72 915\
2da0d70d 916 "add $24, "#dst" \n\t"\
99d2cb72 917\
2da0d70d
DB
918 "add $8, "#index" \n\t"\
919 "cmp "#dstw", "#index" \n\t"\
920 " jb 1b \n\t"
99d2cb72
MN
921
922#ifdef HAVE_MMX2
7630f2e0 923#undef WRITEBGR24
6e1c66bc 924#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index)
99d2cb72 925#else
7630f2e0 926#undef WRITEBGR24
6e1c66bc 927#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index)
99d2cb72
MN
928#endif
929
6e1c66bc 930#define REAL_WRITEYUY2(dst, dstw, index) \
2da0d70d
DB
931 "packuswb %%mm3, %%mm3 \n\t"\
932 "packuswb %%mm4, %%mm4 \n\t"\
933 "packuswb %%mm7, %%mm1 \n\t"\
934 "punpcklbw %%mm4, %%mm3 \n\t"\
935 "movq %%mm1, %%mm7 \n\t"\
936 "punpcklbw %%mm3, %%mm1 \n\t"\
937 "punpckhbw %%mm3, %%mm7 \n\t"\
25593e29 938\
2da0d70d
DB
939 MOVNTQ(%%mm1, (dst, index, 2))\
940 MOVNTQ(%%mm7, 8(dst, index, 2))\
25593e29 941\
2da0d70d
DB
942 "add $8, "#index" \n\t"\
943 "cmp "#dstw", "#index" \n\t"\
944 " jb 1b \n\t"
6e1c66bc 945#define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index)
25593e29
MN
946
947
77a49659 948static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
2da0d70d
DB
949 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
950 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
38858470 951{
c1b0bfb4 952#ifdef HAVE_MMX
2da0d70d
DB
953 if (c->flags & SWS_ACCURATE_RND){
954 if (uDest){
8b2fce0d
MN
955 YSCALEYUV2YV12X_ACCURATE( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
956 YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
2da0d70d 957 }
bca11e75 958
8b2fce0d 959 YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
2da0d70d
DB
960 }else{
961 if (uDest){
8b2fce0d
MN
962 YSCALEYUV2YV12X( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
963 YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
bca11e75 964 }
2da0d70d 965
8b2fce0d 966 YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
2da0d70d 967 }
c1b0bfb4 968#else
a2faa401
RD
969#ifdef HAVE_ALTIVEC
970yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
2da0d70d
DB
971 chrFilter, chrSrc, chrFilterSize,
972 dest, uDest, vDest, dstW, chrDstW);
a2faa401 973#else //HAVE_ALTIVEC
5859233b 974yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
2da0d70d
DB
975 chrFilter, chrSrc, chrFilterSize,
976 dest, uDest, vDest, dstW, chrDstW);
a2faa401 977#endif //!HAVE_ALTIVEC
bc279024 978#endif /* HAVE_MMX */
c1b0bfb4 979}
2add307d 980
6118e52e 981static inline void RENAME(yuv2nv12X)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
2da0d70d
DB
982 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
983 uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
6118e52e
VS
984{
985yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
2da0d70d
DB
986 chrFilter, chrSrc, chrFilterSize,
987 dest, uDest, dstW, chrDstW, dstFormat);
6118e52e
VS
988}
989
bf2bdde6 990static inline void RENAME(yuv2yuv1)(SwsContext *c, int16_t *lumSrc, int16_t *chrSrc,
2da0d70d 991 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
c1b0bfb4
MN
992{
993#ifdef HAVE_MMX
7bae01c6
MN
994 long p= uDest ? 3 : 1;
995 uint8_t *src[3]= {lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
996 uint8_t *dst[3]= {dest, uDest, vDest};
997 long counter[3] = {dstW, chrDstW, chrDstW};
2da0d70d 998
bf2bdde6
MN
999 if (c->flags & SWS_ACCURATE_RND){
1000 while(p--){
1001 asm volatile(
1002 YSCALEYUV2YV121_ACCURATE
1003 :: "r" (src[p]), "r" (dst[p] + counter[p]),
1004 "g" (-counter[p])
1005 : "%"REG_a
1006 );
1007 }
1008 }else{
d78c1ea1
MN
1009 while(p--){
1010 asm volatile(
1011 YSCALEYUV2YV121
1012 :: "r" (src[p]), "r" (dst[p] + counter[p]),
1013 "g" (-counter[p])
1014 : "%"REG_a
1015 );
1016 }
bf2bdde6 1017 }
2da0d70d 1018
c1b0bfb4 1019#else
2da0d70d
DB
1020 int i;
1021 for (i=0; i<dstW; i++)
1022 {
a1f3ffa3 1023 int val= (lumSrc[i]+64)>>7;
2da0d70d
DB
1024
1025 if (val&256){
1026 if (val<0) val=0;
1027 else val=255;
1028 }
1029
1030 dest[i]= val;
1031 }
1032
1b0a4572 1033 if (uDest)
2da0d70d
DB
1034 for (i=0; i<chrDstW; i++)
1035 {
a1f3ffa3
MN
1036 int u=(chrSrc[i ]+64)>>7;
1037 int v=(chrSrc[i + VOFW]+64)>>7;
2da0d70d
DB
1038
1039 if ((u|v)&256){
1040 if (u<0) u=0;
1041 else if (u>255) u=255;
1042 if (v<0) v=0;
1043 else if (v>255) v=255;
1044 }
1045
1046 uDest[i]= u;
1047 vDest[i]= v;
1048 }
c1b0bfb4 1049#endif
38858470
MN
1050}
1051
c1b0bfb4 1052
d604bab9
MN
1053/**
1054 * vertical scale YV12 to RGB
1055 */
25593e29 1056static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
2da0d70d
DB
1057 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
1058 uint8_t *dest, long dstW, long dstY)
c1b0bfb4 1059{
bca11e75 1060#ifdef HAVE_MMX
f8d61128 1061 long dummy=0;
2da0d70d
DB
1062 if (c->flags & SWS_ACCURATE_RND){
1063 switch(c->dstFormat){
1064 case PIX_FMT_RGB32:
1065 YSCALEYUV2PACKEDX_ACCURATE
1066 YSCALEYUV2RGBX
1067 WRITEBGR32(%4, %5, %%REGa)
1068
1069 YSCALEYUV2PACKEDX_END
1070 return;
1071 case PIX_FMT_BGR24:
1072 YSCALEYUV2PACKEDX_ACCURATE
1073 YSCALEYUV2RGBX
1074 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
1075 "add %4, %%"REG_c" \n\t"
1076 WRITEBGR24(%%REGc, %5, %%REGa)
1077
1078
1079 :: "r" (&c->redDither),
1080 "m" (dummy), "m" (dummy), "m" (dummy),
1081 "r" (dest), "m" (dstW)
1082 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
1083 );
1084 return;
27a90b04 1085 case PIX_FMT_RGB555:
2da0d70d
DB
1086 YSCALEYUV2PACKEDX_ACCURATE
1087 YSCALEYUV2RGBX
1088 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
bca11e75 1089#ifdef DITHER1XBPP
2da0d70d
DB
1090 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
1091 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
1092 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
1093#endif
1094
27a90b04 1095 WRITERGB15(%4, %5, %%REGa)
2da0d70d
DB
1096 YSCALEYUV2PACKEDX_END
1097 return;
27a90b04 1098 case PIX_FMT_RGB565:
2da0d70d
DB
1099 YSCALEYUV2PACKEDX_ACCURATE
1100 YSCALEYUV2RGBX
1101 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
bca11e75 1102#ifdef DITHER1XBPP
2da0d70d
DB
1103 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
1104 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
1105 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
1106#endif
1107
27a90b04 1108 WRITERGB16(%4, %5, %%REGa)
2da0d70d
DB
1109 YSCALEYUV2PACKEDX_END
1110 return;
1111 case PIX_FMT_YUYV422:
1112 YSCALEYUV2PACKEDX_ACCURATE
1113 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1114
1115 "psraw $3, %%mm3 \n\t"
1116 "psraw $3, %%mm4 \n\t"
1117 "psraw $3, %%mm1 \n\t"
1118 "psraw $3, %%mm7 \n\t"
1119 WRITEYUY2(%4, %5, %%REGa)
1120 YSCALEYUV2PACKEDX_END
1121 return;
1122 }
bca11e75 1123 }else{
2da0d70d
DB
1124 switch(c->dstFormat)
1125 {
1126 case PIX_FMT_RGB32:
1127 YSCALEYUV2PACKEDX
1128 YSCALEYUV2RGBX
1129 WRITEBGR32(%4, %5, %%REGa)
1130 YSCALEYUV2PACKEDX_END
1131 return;
1132 case PIX_FMT_BGR24:
1133 YSCALEYUV2PACKEDX
1134 YSCALEYUV2RGBX
1135 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t" //FIXME optimize
1136 "add %4, %%"REG_c" \n\t"
1137 WRITEBGR24(%%REGc, %5, %%REGa)
1138
1139 :: "r" (&c->redDither),
1140 "m" (dummy), "m" (dummy), "m" (dummy),
1141 "r" (dest), "m" (dstW)
1142 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
1143 );
1144 return;
27a90b04 1145 case PIX_FMT_RGB555:
2da0d70d
DB
1146 YSCALEYUV2PACKEDX
1147 YSCALEYUV2RGBX
1148 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
c1b0bfb4 1149#ifdef DITHER1XBPP
2da0d70d
DB
1150 "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
1151 "paddusb "MANGLE(g5Dither)", %%mm4 \n\t"
1152 "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
1153#endif
1154
27a90b04 1155 WRITERGB15(%4, %5, %%REGa)
2da0d70d
DB
1156 YSCALEYUV2PACKEDX_END
1157 return;
27a90b04 1158 case PIX_FMT_RGB565:
2da0d70d
DB
1159 YSCALEYUV2PACKEDX
1160 YSCALEYUV2RGBX
1161 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
c1b0bfb4 1162#ifdef DITHER1XBPP
2da0d70d
DB
1163 "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
1164 "paddusb "MANGLE(g6Dither)", %%mm4 \n\t"
1165 "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
1166#endif
1167
27a90b04 1168 WRITERGB16(%4, %5, %%REGa)
2da0d70d
DB
1169 YSCALEYUV2PACKEDX_END
1170 return;
1171 case PIX_FMT_YUYV422:
1172 YSCALEYUV2PACKEDX
1173 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1174
1175 "psraw $3, %%mm3 \n\t"
1176 "psraw $3, %%mm4 \n\t"
1177 "psraw $3, %%mm1 \n\t"
1178 "psraw $3, %%mm7 \n\t"
1179 WRITEYUY2(%4, %5, %%REGa)
1180 YSCALEYUV2PACKEDX_END
1181 return;
bca11e75
MN
1182 }
1183 }
bc279024 1184#endif /* HAVE_MMX */
a31de956 1185#ifdef HAVE_ALTIVEC
2da0d70d
DB
1186 /* The following list of supported dstFormat values should
1187 match what's found in the body of altivec_yuv2packedX() */
1188 if (c->dstFormat==PIX_FMT_ABGR || c->dstFormat==PIX_FMT_BGRA ||
1189 c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
1190 c->dstFormat==PIX_FMT_RGBA || c->dstFormat==PIX_FMT_ARGB)
1191 altivec_yuv2packedX (c, lumFilter, lumSrc, lumFilterSize,
1192 chrFilter, chrSrc, chrFilterSize,
1193 dest, dstW, dstY);
1194 else
1195#endif
1196 yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
1197 chrFilter, chrSrc, chrFilterSize,
1198 dest, dstW, dstY);
c1b0bfb4
MN
1199}
1200
c1b0bfb4
MN
1201/**
1202 * vertical bilinear scale YV12 to RGB
1203 */
25593e29 1204static inline void RENAME(yuv2packed2)(SwsContext *c, uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1,
2da0d70d 1205 uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
d604bab9 1206{
2da0d70d
DB
1207 int yalpha1=yalpha^4095;
1208 int uvalpha1=uvalpha^4095;
1209 int i;
d604bab9 1210
77a416e8 1211#if 0 //isn't used
2da0d70d
DB
1212 if (flags&SWS_FULL_CHR_H_INT)
1213 {
1214 switch(dstFormat)
1215 {
cf7d1c1a 1216#ifdef HAVE_MMX
2da0d70d
DB
1217 case PIX_FMT_RGB32:
1218 asm volatile(
d604bab9
MN
1219
1220
1221FULL_YSCALEYUV2RGB
2da0d70d
DB
1222 "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG
1223 "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0
d604bab9 1224
2da0d70d
DB
1225 "movq %%mm3, %%mm1 \n\t"
1226 "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0
1227 "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0
d604bab9 1228
2da0d70d
DB
1229 MOVNTQ(%%mm3, (%4, %%REGa, 4))
1230 MOVNTQ(%%mm1, 8(%4, %%REGa, 4))
d604bab9 1231
2da0d70d
DB
1232 "add $4, %%"REG_a" \n\t"
1233 "cmp %5, %%"REG_a" \n\t"
1234 " jb 1b \n\t"
d604bab9 1235
2da0d70d
DB
1236 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" ((long)dstW),
1237 "m" (yalpha1), "m" (uvalpha1)
1238 : "%"REG_a
1239 );
1240 break;
1241 case PIX_FMT_BGR24:
1242 asm volatile(
d604bab9
MN
1243
1244FULL_YSCALEYUV2RGB
1245
2da0d70d
DB
1246 // lsb ... msb
1247 "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG
1248 "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0
d604bab9 1249
2da0d70d
DB
1250 "movq %%mm3, %%mm1 \n\t"
1251 "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0
1252 "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0
d604bab9 1253
2da0d70d
DB
1254 "movq %%mm3, %%mm2 \n\t" // BGR0BGR0
1255 "psrlq $8, %%mm3 \n\t" // GR0BGR00
1256 "pand "MANGLE(bm00000111)", %%mm2 \n\t" // BGR00000
1257 "pand "MANGLE(bm11111000)", %%mm3 \n\t" // 000BGR00
1258 "por %%mm2, %%mm3 \n\t" // BGRBGR00
1259 "movq %%mm1, %%mm2 \n\t"
1260 "psllq $48, %%mm1 \n\t" // 000000BG
1261 "por %%mm1, %%mm3 \n\t" // BGRBGRBG
d604bab9 1262
2da0d70d
DB
1263 "movq %%mm2, %%mm1 \n\t" // BGR0BGR0
1264 "psrld $16, %%mm2 \n\t" // R000R000
1265 "psrlq $24, %%mm1 \n\t" // 0BGR0000
1266 "por %%mm2, %%mm1 \n\t" // RBGRR000
d604bab9 1267
2da0d70d
DB
1268 "mov %4, %%"REG_b" \n\t"
1269 "add %%"REG_a", %%"REG_b" \n\t"
d604bab9
MN
1270
1271#ifdef HAVE_MMX2
2da0d70d
DB
1272 //FIXME Alignment
1273 "movntq %%mm3, (%%"REG_b", %%"REG_a", 2) \n\t"
1274 "movntq %%mm1, 8(%%"REG_b", %%"REG_a", 2) \n\t"
d604bab9 1275#else
2da0d70d
DB
1276 "movd %%mm3, (%%"REG_b", %%"REG_a", 2) \n\t"
1277 "psrlq $32, %%mm3 \n\t"
1278 "movd %%mm3, 4(%%"REG_b", %%"REG_a", 2) \n\t"
1279 "movd %%mm1, 8(%%"REG_b", %%"REG_a", 2) \n\t"
1280#endif
1281 "add $4, %%"REG_a" \n\t"
1282 "cmp %5, %%"REG_a" \n\t"
1283 " jb 1b \n\t"
1284
1285 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW),
1286 "m" (yalpha1), "m" (uvalpha1)
1287 : "%"REG_a, "%"REG_b
1288 );
1289 break;
1290 case PIX_FMT_BGR555:
1291 asm volatile(
d604bab9
MN
1292
1293FULL_YSCALEYUV2RGB
1294#ifdef DITHER1XBPP
2da0d70d
DB
1295 "paddusb "MANGLE(g5Dither)", %%mm1 \n\t"
1296 "paddusb "MANGLE(r5Dither)", %%mm0 \n\t"
1297 "paddusb "MANGLE(b5Dither)", %%mm3 \n\t"
d604bab9 1298#endif
2da0d70d
DB
1299 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G
1300 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B
1301 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R
d604bab9 1302
2da0d70d
DB
1303 "psrlw $3, %%mm3 \n\t"
1304 "psllw $2, %%mm1 \n\t"
1305 "psllw $7, %%mm0 \n\t"
1306 "pand "MANGLE(g15Mask)", %%mm1 \n\t"
1307 "pand "MANGLE(r15Mask)", %%mm0 \n\t"
d604bab9 1308
2da0d70d
DB
1309 "por %%mm3, %%mm1 \n\t"
1310 "por %%mm1, %%mm0 \n\t"
d604bab9 1311
2da0d70d 1312 MOVNTQ(%%mm0, (%4, %%REGa, 2))
d604bab9 1313
2da0d70d
DB
1314 "add $4, %%"REG_a" \n\t"
1315 "cmp %5, %%"REG_a" \n\t"
1316 " jb 1b \n\t"
d604bab9 1317
2da0d70d
DB
1318 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
1319 "m" (yalpha1), "m" (uvalpha1)
1320 : "%"REG_a
1321 );
1322 break;
1323 case PIX_FMT_BGR565:
1324 asm volatile(
d604bab9
MN
1325
1326FULL_YSCALEYUV2RGB
1327#ifdef DITHER1XBPP
2da0d70d
DB
1328 "paddusb "MANGLE(g6Dither)", %%mm1 \n\t"
1329 "paddusb "MANGLE(r5Dither)", %%mm0 \n\t"
1330 "paddusb "MANGLE(b5Dither)", %%mm3 \n\t"
d604bab9 1331#endif
2da0d70d
DB
1332 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G
1333 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B
1334 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R
d604bab9 1335
2da0d70d
DB
1336 "psrlw $3, %%mm3 \n\t"
1337 "psllw $3, %%mm1 \n\t"
1338 "psllw $8, %%mm0 \n\t"
1339 "pand "MANGLE(g16Mask)", %%mm1 \n\t"
1340 "pand "MANGLE(r16Mask)", %%mm0 \n\t"
d604bab9 1341
2da0d70d
DB
1342 "por %%mm3, %%mm1 \n\t"
1343 "por %%mm1, %%mm0 \n\t"
d604bab9 1344
2da0d70d 1345 MOVNTQ(%%mm0, (%4, %%REGa, 2))
d604bab9 1346
2da0d70d
DB
1347 "add $4, %%"REG_a" \n\t"
1348 "cmp %5, %%"REG_a" \n\t"
1349 " jb 1b \n\t"
d604bab9 1350
2da0d70d
DB
1351 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
1352 "m" (yalpha1), "m" (uvalpha1)
1353 : "%"REG_a
1354 );
1355 break;
bc279024 1356#endif /* HAVE_MMX */
2da0d70d 1357 case PIX_FMT_BGR32:
cf7d1c1a 1358#ifndef HAVE_MMX
2da0d70d 1359 case PIX_FMT_RGB32:
cf7d1c1a 1360#endif
2da0d70d
DB
1361 if (dstFormat==PIX_FMT_RGB32)
1362 {
1363 int i;
df3c183a 1364#ifdef WORDS_BIGENDIAN
2da0d70d
DB
1365 dest++;
1366#endif
1367 for (i=0;i<dstW;i++){
1368 // vertical linear interpolation && yuv2rgb in a single step:
1369 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
1370 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
8b2fce0d 1371 int V=((uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19);
2da0d70d
DB
1372 dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)];
1373 dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)];
1374 dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)];
1375 dest+= 4;
1376 }
1377 }
1378 else if (dstFormat==PIX_FMT_BGR24)
1379 {
1380 int i;
1381 for (i=0;i<dstW;i++){
1382 // vertical linear interpolation && yuv2rgb in a single step:
1383 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
1384 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
8b2fce0d 1385 int V=((uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19);
2da0d70d
DB
1386 dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)];
1387 dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)];
1388 dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)];
1389 dest+= 3;
1390 }
1391 }
1392 else if (dstFormat==PIX_FMT_BGR565)
1393 {
1394 int i;
1395 for (i=0;i<dstW;i++){
1396 // vertical linear interpolation && yuv2rgb in a single step:
1397 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
1398 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
8b2fce0d 1399 int V=((uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19);
2da0d70d
DB
1400
1401 ((uint16_t*)dest)[i] =
1402 clip_table16b[(Y + yuvtab_40cf[U]) >>13] |
1403 clip_table16g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] |
1404 clip_table16r[(Y + yuvtab_3343[V]) >>13];
1405 }
1406 }
1407 else if (dstFormat==PIX_FMT_BGR555)
1408 {
1409 int i;
1410 for (i=0;i<dstW;i++){
1411 // vertical linear interpolation && yuv2rgb in a single step:
1412 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
1413 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
8b2fce0d 1414 int V=((uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19);
2da0d70d
DB
1415
1416 ((uint16_t*)dest)[i] =
1417 clip_table15b[(Y + yuvtab_40cf[U]) >>13] |
1418 clip_table15g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] |
1419 clip_table15r[(Y + yuvtab_3343[V]) >>13];
1420 }
1421 }
1422 }//FULL_UV_IPOL
1423 else
1424 {
cf7d1c1a 1425#endif // if 0
d604bab9 1426#ifdef HAVE_MMX
2da0d70d
DB
1427 switch(c->dstFormat)
1428 {
1429 //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
1430 case PIX_FMT_RGB32:
1431 asm volatile(
1432 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1433 "mov %4, %%"REG_b" \n\t"
1434 "push %%"REG_BP" \n\t"
1435 YSCALEYUV2RGB(%%REGBP, %5)
1436 WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
1437 "pop %%"REG_BP" \n\t"
1438 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1439
1440 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1441 "a" (&c->redDither)
1442 );
1443 return;
1444 case PIX_FMT_BGR24:
1445 asm volatile(
1446 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1447 "mov %4, %%"REG_b" \n\t"
1448 "push %%"REG_BP" \n\t"
1449 YSCALEYUV2RGB(%%REGBP, %5)
1450 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1451 "pop %%"REG_BP" \n\t"
1452 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1453 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1454 "a" (&c->redDither)
1455 );
1456 return;
27a90b04 1457 case PIX_FMT_RGB555:
2da0d70d
DB
1458 asm volatile(
1459 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1460 "mov %4, %%"REG_b" \n\t"
1461 "push %%"REG_BP" \n\t"
1462 YSCALEYUV2RGB(%%REGBP, %5)
1463 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
d604bab9 1464#ifdef DITHER1XBPP
2da0d70d
DB
1465 "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
1466 "paddusb "MANGLE(g5Dither)", %%mm4 \n\t"
1467 "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
1468#endif
1469
27a90b04 1470 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
2da0d70d
DB
1471 "pop %%"REG_BP" \n\t"
1472 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1473
1474 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1475 "a" (&c->redDither)
1476 );
1477 return;
27a90b04 1478 case PIX_FMT_RGB565:
2da0d70d
DB
1479 asm volatile(
1480 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1481 "mov %4, %%"REG_b" \n\t"
1482 "push %%"REG_BP" \n\t"
1483 YSCALEYUV2RGB(%%REGBP, %5)
1484 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
d604bab9 1485#ifdef DITHER1XBPP
2da0d70d
DB
1486 "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
1487 "paddusb "MANGLE(g6Dither)", %%mm4 \n\t"
1488 "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
1489#endif
1490
27a90b04 1491 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
2da0d70d
DB
1492 "pop %%"REG_BP" \n\t"
1493 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1494 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1495 "a" (&c->redDither)
1496 );
1497 return;
1498 case PIX_FMT_YUYV422:
1499 asm volatile(
1500 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1501 "mov %4, %%"REG_b" \n\t"
1502 "push %%"REG_BP" \n\t"
1503 YSCALEYUV2PACKED(%%REGBP, %5)
1504 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1505 "pop %%"REG_BP" \n\t"
1506 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1507 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1508 "a" (&c->redDither)
1509 );
1510 return;
1511 default: break;
1512 }
cf7d1c1a 1513#endif //HAVE_MMX
b0880d5d 1514YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C, YSCALE_YUV_2_GRAY16_2_C)
d604bab9
MN
1515}
1516
1517/**
1518 * YV12 to RGB without scaling or interpolating
1519 */
25593e29 1520static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1,
2da0d70d 1521 uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y)
d604bab9 1522{
2da0d70d
DB
1523 const int yalpha1=0;
1524 int i;
6a4970ab 1525
8a322796 1526 uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
2da0d70d 1527 const int yalpha= 4096; //FIXME ...
96034638 1528
2da0d70d
DB
1529 if (flags&SWS_FULL_CHR_H_INT)
1530 {
1531 RENAME(yuv2packed2)(c, buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, y);
1532 return;
1533 }
397c035e
MN
1534
1535#ifdef HAVE_MMX
e5091488 1536 if (uvalpha < 2048) // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
2da0d70d
DB
1537 {
1538 switch(dstFormat)
1539 {
1540 case PIX_FMT_RGB32:
1541 asm volatile(
1542 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1543 "mov %4, %%"REG_b" \n\t"
1544 "push %%"REG_BP" \n\t"
1545 YSCALEYUV2RGB1(%%REGBP, %5)
1546 WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
1547 "pop %%"REG_BP" \n\t"
1548 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1549
1550 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1551 "a" (&c->redDither)
1552 );
1553 return;
1554 case PIX_FMT_BGR24:
1555 asm volatile(
1556 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1557 "mov %4, %%"REG_b" \n\t"
1558 "push %%"REG_BP" \n\t"
1559 YSCALEYUV2RGB1(%%REGBP, %5)
1560 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1561 "pop %%"REG_BP" \n\t"
1562 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1563
1564 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1565 "a" (&c->redDither)
1566 );
1567 return;
27a90b04 1568 case PIX_FMT_RGB555:
2da0d70d
DB
1569 asm volatile(
1570 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1571 "mov %4, %%"REG_b" \n\t"
1572 "push %%"REG_BP" \n\t"
1573 YSCALEYUV2RGB1(%%REGBP, %5)
1574 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
d604bab9 1575#ifdef DITHER1XBPP
2da0d70d
DB
1576 "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
1577 "paddusb "MANGLE(g5Dither)", %%mm4 \n\t"
1578 "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
1579#endif
27a90b04 1580 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
2da0d70d
DB
1581 "pop %%"REG_BP" \n\t"
1582 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1583
1584 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1585 "a" (&c->redDither)
1586 );
1587 return;
27a90b04 1588 case PIX_FMT_RGB565:
2da0d70d
DB
1589 asm volatile(
1590 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1591 "mov %4, %%"REG_b" \n\t"
1592 "push %%"REG_BP" \n\t"
1593 YSCALEYUV2RGB1(%%REGBP, %5)
1594 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
d604bab9 1595#ifdef DITHER1XBPP
2da0d70d
DB
1596 "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
1597 "paddusb "MANGLE(g6Dither)", %%mm4 \n\t"
1598 "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
1599#endif
1600
27a90b04 1601 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
2da0d70d
DB
1602 "pop %%"REG_BP" \n\t"
1603 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1604
1605 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1606 "a" (&c->redDither)
1607 );
1608 return;
1609 case PIX_FMT_YUYV422:
1610 asm volatile(
1611 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1612 "mov %4, %%"REG_b" \n\t"
1613 "push %%"REG_BP" \n\t"
1614 YSCALEYUV2PACKED1(%%REGBP, %5)
1615 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1616 "pop %%"REG_BP" \n\t"
1617 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1618
1619 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1620 "a" (&c->redDither)
1621 );
1622 return;
1623 }
1624 }
1625 else
1626 {
1627 switch(dstFormat)
1628 {
1629 case PIX_FMT_RGB32:
1630 asm volatile(
1631 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1632 "mov %4, %%"REG_b" \n\t"
1633 "push %%"REG_BP" \n\t"
1634 YSCALEYUV2RGB1b(%%REGBP, %5)
1635 WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
1636 "pop %%"REG_BP" \n\t"
1637 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1638
1639 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1640 "a" (&c->redDither)
1641 );
1642 return;
1643 case PIX_FMT_BGR24:
1644 asm volatile(
1645 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1646 "mov %4, %%"REG_b" \n\t"
1647 "push %%"REG_BP" \n\t"
1648 YSCALEYUV2RGB1b(%%REGBP, %5)
1649 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1650 "pop %%"REG_BP" \n\t"
1651 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1652
1653 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1654 "a" (&c->redDither)
1655 );
1656 return;
27a90b04 1657 case PIX_FMT_RGB555:
2da0d70d
DB
1658 asm volatile(
1659 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1660 "mov %4, %%"REG_b" \n\t"
1661 "push %%"REG_BP" \n\t"
1662 YSCALEYUV2RGB1b(%%REGBP, %5)
1663 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
497d4f99 1664#ifdef DITHER1XBPP
2da0d70d
DB
1665 "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
1666 "paddusb "MANGLE(g5Dither)", %%mm4 \n\t"
1667 "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
1668#endif
27a90b04 1669 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
2da0d70d
DB
1670 "pop %%"REG_BP" \n\t"
1671 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1672
1673 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1674 "a" (&c->redDither)
1675 );
1676 return;
27a90b04 1677 case PIX_FMT_RGB565:
2da0d70d
DB
1678 asm volatile(
1679 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1680 "mov %4, %%"REG_b" \n\t"
1681 "push %%"REG_BP" \n\t"
1682 YSCALEYUV2RGB1b(%%REGBP, %5)
1683 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
497d4f99 1684#ifdef DITHER1XBPP
2da0d70d
DB
1685 "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
1686 "paddusb "MANGLE(g6Dither)", %%mm4 \n\t"
1687 "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
1688#endif
1689
27a90b04 1690 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
2da0d70d
DB
1691 "pop %%"REG_BP" \n\t"
1692 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1693
1694 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1695 "a" (&c->redDither)
1696 );
1697 return;
1698 case PIX_FMT_YUYV422:
1699 asm volatile(
1700 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1701 "mov %4, %%"REG_b" \n\t"
1702 "push %%"REG_BP" \n\t"
1703 YSCALEYUV2PACKED1b(%%REGBP, %5)
1704 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1705 "pop %%"REG_BP" \n\t"
1706 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1707
1708 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1709 "a" (&c->redDither)
1710 );
1711 return;
1712 }
1713 }
bc279024 1714#endif /* HAVE_MMX */
e5091488 1715 if (uvalpha < 2048)
2da0d70d 1716 {
b0880d5d 1717 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C, YSCALE_YUV_2_GRAY16_1_C)
2da0d70d 1718 }else{
b0880d5d 1719 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C, YSCALE_YUV_2_GRAY16_1_C)
2da0d70d 1720 }
d604bab9
MN
1721}
1722
8a322796 1723//FIXME yuy2* can read up to 7 samples too much
6ff0ad6b 1724
7f526efd 1725static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width)
1e621b18 1726{
6ff0ad6b 1727#ifdef HAVE_MMX
2da0d70d
DB
1728 asm volatile(
1729 "movq "MANGLE(bm01010101)", %%mm2 \n\t"
1730 "mov %0, %%"REG_a" \n\t"
1731 "1: \n\t"
1732 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
1733 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
1734 "pand %%mm2, %%mm0 \n\t"
1735 "pand %%mm2, %%mm1 \n\t"
1736 "packuswb %%mm1, %%mm0 \n\t"
1737 "movq %%mm0, (%2, %%"REG_a") \n\t"
1738 "add $8, %%"REG_a" \n\t"
1739 " js 1b \n\t"
1740 : : "g" (-width), "r" (src+width*2), "r" (dst+width)
1741 : "%"REG_a
1742 );
1e621b18 1743#else
2da0d70d
DB
1744 int i;
1745 for (i=0; i<width; i++)
1746 dst[i]= src[2*i];
1e621b18
MN
1747#endif
1748}
1749
7f526efd 1750static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
1e621b18 1751{
c2271987 1752#ifdef HAVE_MMX
2da0d70d
DB
1753 asm volatile(
1754 "movq "MANGLE(bm01010101)", %%mm4 \n\t"
1755 "mov %0, %%"REG_a" \n\t"
1756 "1: \n\t"
1757 "movq (%1, %%"REG_a",4), %%mm0 \n\t"
1758 "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
1759 "psrlw $8, %%mm0 \n\t"
1760 "psrlw $8, %%mm1 \n\t"
1761 "packuswb %%mm1, %%mm0 \n\t"
1762 "movq %%mm0, %%mm1 \n\t"
1763 "psrlw $8, %%mm0 \n\t"
1764 "pand %%mm4, %%mm1 \n\t"
1765 "packuswb %%mm0, %%mm0 \n\t"
1766 "packuswb %%mm1, %%mm1 \n\t"
1767 "movd %%mm0, (%3, %%"REG_a") \n\t"
1768 "movd %%mm1, (%2, %%"REG_a") \n\t"
1769 "add $4, %%"REG_a" \n\t"
1770 " js 1b \n\t"
1771 : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
1772 : "%"REG_a
1773 );
1e621b18 1774#else
2da0d70d
DB
1775 int i;
1776 for (i=0; i<width; i++)
1777 {
1778 dstU[i]= src1[4*i + 1];
1779 dstV[i]= src1[4*i + 3];
1780 }
1781#endif
1782 assert(src1 == src2);
1e621b18
MN
1783}
1784
4cf16bbe
DB
1785/* This is almost identical to the previous, end exists only because
1786 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
7f526efd 1787static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, long width)
7322a67c
MN
1788{
1789#ifdef HAVE_MMX
2da0d70d
DB
1790 asm volatile(
1791 "mov %0, %%"REG_a" \n\t"
1792 "1: \n\t"
1793 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
1794 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
1795 "psrlw $8, %%mm0 \n\t"
1796 "psrlw $8, %%mm1 \n\t"
1797 "packuswb %%mm1, %%mm0 \n\t"
1798 "movq %%mm0, (%2, %%"REG_a") \n\t"
1799 "add $8, %%"REG_a" \n\t"
1800 " js 1b \n\t"
1801 : : "g" (-width), "r" (src+width*2), "r" (dst+width)
1802 : "%"REG_a
1803 );
7322a67c 1804#else
2da0d70d
DB
1805 int i;
1806 for (i=0; i<width; i++)
1807 dst[i]= src[2*i+1];
7322a67c
MN
1808#endif
1809}
1810
7f526efd 1811static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
7322a67c 1812{
c2271987 1813#ifdef HAVE_MMX
2da0d70d
DB
1814 asm volatile(
1815 "movq "MANGLE(bm01010101)", %%mm4 \n\t"
1816 "mov %0, %%"REG_a" \n\t"
1817 "1: \n\t"
1818 "movq (%1, %%"REG_a",4), %%mm0 \n\t"
1819 "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
1820 "pand %%mm4, %%mm0 \n\t"
1821 "pand %%mm4, %%mm1 \n\t"
1822 "packuswb %%mm1, %%mm0 \n\t"
1823 "movq %%mm0, %%mm1 \n\t"
1824 "psrlw $8, %%mm0 \n\t"
1825 "pand %%mm4, %%mm1 \n\t"
1826 "packuswb %%mm0, %%mm0 \n\t"
1827 "packuswb %%mm1, %%mm1 \n\t"
1828 "movd %%mm0, (%3, %%"REG_a") \n\t"
1829 "movd %%mm1, (%2, %%"REG_a") \n\t"
1830 "add $4, %%"REG_a" \n\t"
1831 " js 1b \n\t"
1832 : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
1833 : "%"REG_a
1834 );
7322a67c 1835#else
2da0d70d
DB
1836 int i;
1837 for (i=0; i<width; i++)
1838 {
1839 dstU[i]= src1[4*i + 0];
1840 dstV[i]= src1[4*i + 2];
1841 }
1842#endif
1843 assert(src1 == src2);
7322a67c
MN
1844}
1845
97b93389 1846static inline void RENAME(bgr32ToY)(uint8_t *dst, uint8_t *src, long width)
1e621b18 1847{
2da0d70d
DB
1848 int i;
1849 for (i=0; i<width; i++)
1850 {
1851 int b= ((uint32_t*)src)[i]&0xFF;
1852 int g= (((uint32_t*)src)[i]>>8)&0xFF;
1853 int r= (((uint32_t*)src)[i]>>16)&0xFF;
1854
e5091488 1855 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
2da0d70d 1856 }
1e621b18
MN
1857}
1858
97b93389 1859static inline void RENAME(bgr32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
1e621b18 1860{
2da0d70d
DB
1861 int i;
1862 assert(src1 == src2);
1863 for (i=0; i<width; i++)
1864 {
1865 const int a= ((uint32_t*)src1)[2*i+0];
1866 const int e= ((uint32_t*)src1)[2*i+1];
1867 const int l= (a&0xFF00FF) + (e&0xFF00FF);
1868 const int h= (a&0x00FF00) + (e&0x00FF00);
1869 const int b= l&0x3FF;
1870 const int g= h>>8;
1871 const int r= l>>16;
1872
35ab2b64
MN
1873 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1874 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
2da0d70d 1875 }
1e621b18
MN
1876}
1877
7f526efd 1878static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, long width)
1e621b18 1879{
ac6a2e45 1880#ifdef HAVE_MMX
2da0d70d
DB
1881 asm volatile(
1882 "mov %2, %%"REG_a" \n\t"
5802683a
RD
1883 "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t"
1884 "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
2da0d70d
DB
1885 "pxor %%mm7, %%mm7 \n\t"
1886 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
1887 ASMALIGN(4)
1888 "1: \n\t"
1889 PREFETCH" 64(%0, %%"REG_d") \n\t"
1890 "movd (%0, %%"REG_d"), %%mm0 \n\t"
1891 "movd 3(%0, %%"REG_d"), %%mm1 \n\t"
1892 "punpcklbw %%mm7, %%mm0 \n\t"
1893 "punpcklbw %%mm7, %%mm1 \n\t"
1894 "movd 6(%0, %%"REG_d"), %%mm2 \n\t"
1895 "movd 9(%0, %%"REG_d"), %%mm3 \n\t"
1896 "punpcklbw %%mm7, %%mm2 \n\t"
1897 "punpcklbw %%mm7, %%mm3 \n\t"
1898 "pmaddwd %%mm6, %%mm0 \n\t"
1899 "pmaddwd %%mm6, %%mm1 \n\t"
1900 "pmaddwd %%mm6, %%mm2 \n\t"
1901 "pmaddwd %%mm6, %%mm3 \n\t"
ac6a2e45 1902#ifndef FAST_BGR2YV12
2da0d70d
DB
1903 "psrad $8, %%mm0 \n\t"
1904 "psrad $8, %%mm1 \n\t"
1905 "psrad $8, %%mm2 \n\t"
1906 "psrad $8, %%mm3 \n\t"
1907#endif
1908 "packssdw %%mm1, %%mm0 \n\t"
1909 "packssdw %%mm3, %%mm2 \n\t"
1910 "pmaddwd %%mm5, %%mm0 \n\t"
1911 "pmaddwd %%mm5, %%mm2 \n\t"
1912 "packssdw %%mm2, %%mm0 \n\t"
1913 "psraw $7, %%mm0 \n\t"
1914
1915 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
1916 "movd 15(%0, %%"REG_d"), %%mm1 \n\t"
1917 "punpcklbw %%mm7, %%mm4 \n\t"
1918 "punpcklbw %%mm7, %%mm1 \n\t"
1919 "movd 18(%0, %%"REG_d"), %%mm2 \n\t"
1920 "movd 21(%0, %%"REG_d"), %%mm3 \n\t"
1921 "punpcklbw %%mm7, %%mm2 \n\t"
1922 "punpcklbw %%mm7, %%mm3 \n\t"
1923 "pmaddwd %%mm6, %%mm4 \n\t"
1924 "pmaddwd %%mm6, %%mm1 \n\t"
1925 "pmaddwd %%mm6, %%mm2 \n\t"
1926 "pmaddwd %%mm6, %%mm3 \n\t"
ac6a2e45 1927#ifndef FAST_BGR2YV12
2da0d70d
DB
1928 "psrad $8, %%mm4 \n\t"
1929 "psrad $8, %%mm1 \n\t"
1930 "psrad $8, %%mm2 \n\t"
1931 "psrad $8, %%mm3 \n\t"
1932#endif
1933 "packssdw %%mm1, %%mm4 \n\t"
1934 "packssdw %%mm3, %%mm2 \n\t"
1935 "pmaddwd %%mm5, %%mm4 \n\t"
1936 "pmaddwd %%mm5, %%mm2 \n\t"
1937 "add $24, %%"REG_d" \n\t"
1938 "packssdw %%mm2, %%mm4 \n\t"
1939 "psraw $7, %%mm4 \n\t"
1940
1941 "packuswb %%mm4, %%mm0 \n\t"
5802683a 1942 "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0 \n\t"
2da0d70d
DB
1943
1944 "movq %%mm0, (%1, %%"REG_a") \n\t"
1945 "add $8, %%"REG_a" \n\t"
1946 " js 1b \n\t"
1947 : : "r" (src+width*3), "r" (dst+width), "g" (-width)
1948 : "%"REG_a, "%"REG_d
1949 );
1e621b18 1950#else
2da0d70d
DB
1951 int i;
1952 for (i=0; i<width; i++)
1953 {
1954 int b= src[i*3+0];
1955 int g= src[i*3+1];
1956 int r= src[i*3+2];
1e621b18 1957
e5091488 1958 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
2da0d70d 1959 }
bc279024 1960#endif /* HAVE_MMX */
1e621b18
MN
1961}
1962
7f526efd 1963static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
1e621b18 1964{
4342fc14 1965#ifdef HAVE_MMX
2da0d70d
DB
1966 asm volatile(
1967 "mov %3, %%"REG_a" \n\t"
5802683a
RD
1968 "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
1969 "movq "MANGLE(ff_bgr2UCoeff)", %%mm6 \n\t"
2da0d70d
DB
1970 "pxor %%mm7, %%mm7 \n\t"
1971 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
1972 "add %%"REG_d", %%"REG_d" \n\t"
1973 ASMALIGN(4)
1974 "1: \n\t"
1975 PREFETCH" 64(%0, %%"REG_d") \n\t"
4342fc14 1976#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
2da0d70d
DB
1977 "movq (%0, %%"REG_d"), %%mm0 \n\t"
1978 "movq 6(%0, %%"REG_d"), %%mm2 \n\t"
1979 "movq %%mm0, %%mm1 \n\t"
1980 "movq %%mm2, %%mm3 \n\t"
1981 "psrlq $24, %%mm0 \n\t"
1982 "psrlq $24, %%mm2 \n\t"
1983 PAVGB(%%mm1, %%mm0)
1984 PAVGB(%%mm3, %%mm2)
1985 "punpcklbw %%mm7, %%mm0 \n\t"
1986 "punpcklbw %%mm7, %%mm2 \n\t"
4342fc14 1987#else
2da0d70d
DB
1988 "movd (%0, %%"REG_d"), %%mm0 \n\t"
1989 "movd 3(%0, %%"REG_d"), %%mm2 \n\t"
1990 "punpcklbw %%mm7, %%mm0 \n\t"
1991 "punpcklbw %%mm7, %%mm2 \n\t"
1992 "paddw %%mm2, %%mm0 \n\t"
1993 "movd 6(%0, %%"REG_d"), %%mm4 \n\t"
1994 "movd 9(%0, %%"REG_d"), %%mm2 \n\t"
1995 "punpcklbw %%mm7, %%mm4 \n\t"
1996 "punpcklbw %%mm7, %%mm2 \n\t"
1997 "paddw %%mm4, %%mm2 \n\t"
1998 "psrlw $1, %%mm0 \n\t"
1999 "psrlw $1, %%mm2 \n\t"
2000#endif
5802683a
RD
2001 "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t"
2002 "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t"
2da0d70d
DB
2003
2004 "pmaddwd %%mm0, %%mm1 \n\t"
2005 "pmaddwd %%mm2, %%mm3 \n\t"
2006 "pmaddwd %%mm6, %%mm0 \n\t"
2007 "pmaddwd %%mm6, %%mm2 \n\t"
4342fc14 2008#ifndef FAST_BGR2YV12
2da0d70d
DB
2009 "psrad $8, %%mm0 \n\t"
2010 "psrad $8, %%mm1 \n\t"
2011 "psrad $8, %%mm2 \n\t"
2012 "psrad $8, %%mm3 \n\t"
2013#endif
2014 "packssdw %%mm2, %%mm0 \n\t"
2015 "packssdw %%mm3, %%mm1 \n\t"
2016 "pmaddwd %%mm5, %%mm0 \n\t"
2017 "pmaddwd %%mm5, %%mm1 \n\t"
2018 "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0
2019 "psraw $7, %%mm0 \n\t"
4342fc14
MN
2020
2021#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
2da0d70d
DB
2022 "movq 12(%0, %%"REG_d"), %%mm4 \n\t"
2023 "movq 18(%0, %%"REG_d"), %%mm2 \n\t"
2024 "movq %%mm4, %%mm1 \n\t"
2025 "movq %%mm2, %%mm3 \n\t"
2026 "psrlq $24, %%mm4 \n\t"
2027 "psrlq $24, %%mm2 \n\t"
2028 PAVGB(%%mm1, %%mm4)
2029 PAVGB(%%mm3, %%mm2)
2030 "punpcklbw %%mm7, %%mm4 \n\t"
2031 "punpcklbw %%mm7, %%mm2 \n\t"
4342fc14 2032#else
2da0d70d
DB
2033 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
2034 "movd 15(%0, %%"REG_d"), %%mm2 \n\t"
2035 "punpcklbw %%mm7, %%mm4 \n\t"
2036 "punpcklbw %%mm7, %%mm2 \n\t"
2037 "paddw %%mm2, %%mm4 \n\t"
2038 "movd 18(%0, %%"REG_d"), %%mm5 \n\t"
2039 "movd 21(%0, %%"REG_d"), %%mm2 \n\t"
2040 "punpcklbw %%mm7, %%mm5 \n\t"
2041 "punpcklbw %%mm7, %%mm2 \n\t"
2042 "paddw %%mm5, %%mm2 \n\t"
5802683a 2043 "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
2da0d70d
DB
2044 "psrlw $2, %%mm4 \n\t"
2045 "psrlw $2, %%mm2 \n\t"
2046#endif
5802683a
RD
2047 "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t"
2048 "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t"
2da0d70d
DB
2049
2050 "pmaddwd %%mm4, %%mm1 \n\t"
2051 "pmaddwd %%mm2, %%mm3 \n\t"
2052 "pmaddwd %%mm6, %%mm4 \n\t"
2053 "pmaddwd %%mm6, %%mm2 \n\t"
4342fc14 2054#ifndef FAST_BGR2YV12
2da0d70d
DB
2055 "psrad $8, %%mm4 \n\t"
2056 "psrad $8, %%mm1 \n\t"
2057 "psrad $8, %%mm2 \n\t"
2058 "psrad $8, %%mm3 \n\t"
2059#endif
2060 "packssdw %%mm2, %%mm4 \n\t"
2061 "packssdw %%mm3, %%mm1 \n\t"
2062 "pmaddwd %%mm5, %%mm4 \n\t"
2063 "pmaddwd %%mm5, %%mm1 \n\t"
2064 "add $24, %%"REG_d" \n\t"
2065 "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2
2066 "psraw $7, %%mm4 \n\t"
2067
2068 "movq %%mm0, %%mm1 \n\t"
2069 "punpckldq %%mm4, %%mm0 \n\t"
2070 "punpckhdq %%mm4, %%mm1 \n\t"
2071 "packsswb %%mm1, %%mm0 \n\t"
5802683a 2072 "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0 \n\t"
2da0d70d
DB
2073
2074 "movd %%mm0, (%1, %%"REG_a") \n\t"
2075 "punpckhdq %%mm0, %%mm0 \n\t"
2076 "movd %%mm0, (%2, %%"REG_a") \n\t"
2077 "add $4, %%"REG_a" \n\t"
2078 " js 1b \n\t"
2079 : : "r" (src1+width*6), "r" (dstU+width), "r" (dstV+width), "g" (-width)
2080 : "%"REG_a, "%"REG_d
2081 );
1e621b18 2082#else
2da0d70d
DB
2083 int i;
2084 for (i=0; i<width; i++)
2085 {
2086 int b= src1[6*i + 0] + src1[6*i + 3];
2087 int g= src1[6*i + 1] + src1[6*i + 4];
2088 int r= src1[6*i + 2] + src1[6*i + 5];
2089
35ab2b64
MN
2090 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
2091 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
2da0d70d 2092 }
bc279024 2093#endif /* HAVE_MMX */
2da0d70d 2094 assert(src1 == src2);
1e621b18
MN
2095}
2096
97b93389 2097static inline void RENAME(rgb16ToY)(uint8_t *dst, uint8_t *src, long width)
6af250ea 2098{
2da0d70d
DB
2099 int i;
2100 for (i=0; i<width; i++)
2101 {
2102 int d= ((uint16_t*)src)[i];
2103 int b= d&0x1F;
2104 int g= (d>>5)&0x3F;
2105 int r= (d>>11)&0x1F;
2106
35ab2b64 2107 dst[i]= (2*RY*r + GY*g + 2*BY*b + (33<<(RGB2YUV_SHIFT-3)))>>(RGB2YUV_SHIFT-2);
2da0d70d 2108 }
6af250ea
MN
2109}
2110
97b93389 2111static inline void RENAME(rgb16ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
6af250ea 2112{
2da0d70d
DB
2113 int i;
2114 assert(src1==src2);
2115 for (i=0; i<width; i++)
2116 {
2117 int d0= ((uint32_t*)src1)[i];
2118
2119 int dl= (d0&0x07E0F81F);
2120 int dh= ((d0>>5)&0x07C0F83F);
2121
2122 int dh2= (dh>>11) + (dh<<21);
2123 int d= dh2 + dl;
2124
2125 int b= d&0x7F;
2126 int r= (d>>11)&0x7F;
2127 int g= d>>21;
35ab2b64
MN
2128 dstU[i]= (2*RU*r + GU*g + 2*BU*b + (257<<(RGB2YUV_SHIFT-2)))>>(RGB2YUV_SHIFT+1-2);
2129 dstV[i]= (2*RV*r + GV*g + 2*BV*b + (257<<(RGB2YUV_SHIFT-2)))>>(RGB2YUV_SHIFT+1-2);
2da0d70d 2130 }
6af250ea
MN
2131}
2132
97b93389 2133static inline void RENAME(rgb15ToY)(uint8_t *dst, uint8_t *src, long width)
b72034dd 2134{
2da0d70d
DB
2135 int i;
2136 for (i=0; i<width; i++)
2137 {
2138 int d= ((uint16_t*)src)[i];
2139 int b= d&0x1F;
2140 int g= (d>>5)&0x1F;
2141 int r= (d>>10)&0x1F;
2142
35ab2b64 2143 dst[i]= (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-4)))>>(RGB2YUV_SHIFT-3);
2da0d70d 2144 }
b72034dd
MN
2145}
2146
97b93389 2147static inline void RENAME(rgb15ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
b72034dd 2148{
2da0d70d
DB
2149 int i;
2150 assert(src1==src2);
2151 for (i=0; i<width; i++)
2152 {
2153 int d0= ((uint32_t*)src1)[i];
2154
2155 int dl= (d0&0x03E07C1F);
2156 int dh= ((d0>>5)&0x03E0F81F);
2157
2158 int dh2= (dh>>11) + (dh<<21);
2159 int d= dh2 + dl;
2160
2161 int b= d&0x7F;
2162 int r= (d>>10)&0x7F;
2163 int g= d>>21;
35ab2b64
MN
2164 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-3)))>>(RGB2YUV_SHIFT+1-3);
2165 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-3)))>>(RGB2YUV_SHIFT+1-3);
2da0d70d 2166 }
b72034dd
MN
2167}
2168
2169
97b93389 2170static inline void RENAME(rgb32ToY)(uint8_t *dst, uint8_t *src, long width)
a861d4d7 2171{
2da0d70d
DB
2172 int i;
2173 for (i=0; i<width; i++)
2174 {
2175 int r= ((uint32_t*)src)[i]&0xFF;
2176 int g= (((uint32_t*)src)[i]>>8)&0xFF;
2177 int b= (((uint32_t*)src)[i]>>16)&0xFF;
2178
e5091488 2179 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
2da0d70d 2180 }
a861d4d7
MN
2181}
2182
97b93389 2183static inline void RENAME(rgb32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
a861d4d7 2184{
2da0d70d
DB
2185 int i;
2186 assert(src1==src2);
2187 for (i=0; i<width; i++)
2188 {
2189 const int a= ((uint32_t*)src1)[2*i+0];
2190 const int e= ((uint32_t*)src1)[2*i+1];
2191 const int l= (a&0xFF00FF) + (e&0xFF00FF);
2192 const int h= (a&0x00FF00) + (e&0x00FF00);
2193 const int r= l&0x3FF;
2194 const int g= h>>8;
2195 const int b= l>>16;
2196
35ab2b64
MN
2197 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT)))>>(RGB2YUV_SHIFT+1);
2198 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT)))>>(RGB2YUV_SHIFT+1);
2da0d70d 2199 }
a861d4d7
MN
2200}
2201
97b93389 2202static inline void RENAME(rgb24ToY)(uint8_t *dst, uint8_t *src, long width)
a861d4d7 2203{
2da0d70d
DB
2204 int i;
2205 for (i=0; i<width; i++)
2206 {
2207 int r= src[i*3+0];
2208 int g= src[i*3+1];
2209 int b= src[i*3+2];
2210
e5091488 2211 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
2da0d70d 2212 }
a861d4d7
MN
2213}
2214
97b93389 2215static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
a861d4d7 2216{
2da0d70d
DB
2217 int i;
2218 assert(src1==src2);
2219 for (i=0; i<width; i++)
2220 {
2221 int r= src1[6*i + 0] + src1[6*i + 3];
2222 int g= src1[6*i + 1] + src1[6*i + 4];
2223 int b= src1[6*i + 2] + src1[6*i + 5];
2224
35ab2b64
MN
2225 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT)))>>(RGB2YUV_SHIFT+1);
2226 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT)))>>(RGB2YUV_SHIFT+1);
2da0d70d 2227 }
a861d4d7
MN
2228}
2229
97b93389 2230static inline void RENAME(bgr16ToY)(uint8_t *dst, uint8_t *src, long width)
a43fb6b3 2231{
2da0d70d
DB
2232 int i;
2233 for (i=0; i<width; i++)
2234 {
2235 int d= ((uint16_t*)src)[i];
2236 int r= d&0x1F;
2237 int g= (d>>5)&0x3F;
2238 int b= (d>>11)&0x1F;
2239
35ab2b64 2240 dst[i]= (2*RY*r + GY*g + 2*BY*b + (33<<(RGB2YUV_SHIFT-3)))>>(RGB2YUV_SHIFT-2);
2da0d70d 2241 }
a43fb6b3
LA
2242}
2243
97b93389 2244static inline void RENAME(bgr16ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
a43fb6b3 2245{
2da0d70d
DB
2246 int i;
2247 assert(src1 == src2);
2248 for (i=0; i<width; i++)
2249 {
2250 int d0= ((uint32_t*)src1)[i];
2251
2252 int dl= (d0&0x07E0F81F);
fa884294 2253 int d= dl + (((d0>>16) + (d0<<16))&0x07E0F81F);
2da0d70d 2254
fa884294
IP
2255 int r= d&0x3F;
2256 int b= (d>>11)&0x3F;
2da0d70d 2257 int g= d>>21;
35ab2b64
MN
2258 dstU[i]= (2*RU*r + GU*g + 2*BU*b + (257<<(RGB2YUV_SHIFT-2)))>>(RGB2YUV_SHIFT+1-2);
2259 dstV[i]= (2*RV*r + GV*g + 2*BV*b + (257<<(RGB2YUV_SHIFT-2)))>>(RGB2YUV_SHIFT+1-2);
2da0d70d 2260 }
a43fb6b3
LA
2261}
2262
97b93389 2263static inline void RENAME(bgr15ToY)(uint8_t *dst, uint8_t *src, long width)
a43fb6b3 2264{
2da0d70d
DB
2265 int i;
2266 for (i=0; i<width; i++)
2267 {
2268 int d= ((uint16_t*)src)[i];
2269 int r= d&0x1F;
2270 int g= (d>>5)&0x1F;
2271 int b= (d>>10)&0x1F;
2272
35ab2b64 2273 dst[i]= (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-4)))>>(RGB2YUV_SHIFT-3);
2da0d70d 2274 }
a43fb6b3
LA
2275}
2276
97b93389 2277static inline void RENAME(bgr15ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
a43fb6b3 2278{
2da0d70d
DB
2279 int i;
2280 assert(src1 == src2);
2281 for (i=0; i<width; i++)
2282 {
2283 int d0= ((uint32_t*)src1)[i];
2284
2285 int dl= (d0&0x03E07C1F);
f96829d2 2286 int d= dl + (((d0>>16) + (d0<<16))&0x03E07C1F);
2da0d70d 2287
f96829d2
IP
2288 int r= d&0x3F;
2289 int b= (d>>10)&0x3F;
2290 int g= d>>21;
35ab2b64
MN
2291 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-3)))>>(RGB2YUV_SHIFT+1-3);
2292 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-3)))>>(RGB2YUV_SHIFT+1-3);
2da0d70d 2293 }
a43fb6b3 2294}
1e621b18 2295
97b93389 2296static inline void RENAME(palToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *pal)
e28630fc 2297{
2da0d70d
DB
2298 int i;
2299 for (i=0; i<width; i++)
2300 {
2301 int d= src[i];
e28630fc 2302
2da0d70d
DB
2303 dst[i]= pal[d] & 0xFF;
2304 }
e28630fc
MN
2305}
2306
97b93389 2307static inline void RENAME(palToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *pal)
e28630fc 2308{
2da0d70d
DB
2309 int i;
2310 assert(src1 == src2);
2311 for (i=0; i<width; i++)
2312 {
2313 int p= pal[src1[i]];
2314
2315 dstU[i]= p>>8;
2316 dstV[i]= p>>16;
2317 }
e28630fc
MN
2318}
2319
8a322796 2320// bilinear / bicubic scaling
077ea8a7 2321static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc,
2da0d70d 2322 int16_t *filter, int16_t *filterPos, long filterSize)
2ff198c1 2323{
077ea8a7 2324#ifdef HAVE_MMX
2da0d70d
DB
2325 assert(filterSize % 4 == 0 && filterSize>0);
2326 if (filterSize==4) // Always true for upscaling, sometimes for down, too.
2327 {
2328 long counter= -2*dstW;
2329 filter-= counter*2;
2330 filterPos-= counter/2;
2331 dst-= counter/2;
2332 asm volatile(
83c89c78 2333#if defined(PIC)
2da0d70d
DB
2334 "push %%"REG_b" \n\t"
2335#endif
2336 "pxor %%mm7, %%mm7 \n\t"
2337 "movq "MANGLE(w02)", %%mm6 \n\t"
2338 "push %%"REG_BP" \n\t" // we use 7 regs here ...
2339 "mov %%"REG_a", %%"REG_BP" \n\t"
2340 ASMALIGN(4)
2341 "1: \n\t"
2342 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
2343 "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t"
2344 "movq (%1, %%"REG_BP", 4), %%mm1 \n\t"
2345 "movq 8(%1, %%"REG_BP", 4), %%mm3 \n\t"
2346 "movd (%3, %%"REG_a"), %%mm0 \n\t"
2347 "movd (%3, %%"REG_b"), %%mm2 \n\t"
2348 "punpcklbw %%mm7, %%mm0 \n\t"
2349 "punpcklbw %%mm7, %%mm2 \n\t"
2350 "pmaddwd %%mm1, %%mm0 \n\t"
2351 "pmaddwd %%mm2, %%mm3 \n\t"
2352 "psrad $8, %%mm0 \n\t"
2353 "psrad $8, %%mm3 \n\t"
2354 "packssdw %%mm3, %%mm0 \n\t"
2355 "pmaddwd %%mm6, %%mm0 \n\t"
2356 "packssdw %%mm0, %%mm0 \n\t"
2357 "movd %%mm0, (%4, %%"REG_BP") \n\t"
2358 "add $4, %%"REG_BP" \n\t"
2359 " jnc 1b \n\t"
2360
2361 "pop %%"REG_BP" \n\t"
83c89c78 2362#if defined(PIC)
2da0d70d 2363 "pop %%"REG_b" \n\t"
83c89c78 2364#endif
2da0d70d
DB
2365 : "+a" (counter)
2366 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
83c89c78 2367#if !defined(PIC)
2da0d70d
DB
2368 : "%"REG_b
2369#endif
2370 );
2371 }
2372 else if (filterSize==8)
2373 {
2374 long counter= -2*dstW;
2375 filter-= counter*4;
2376 filterPos-= counter/2;
2377 dst-= counter/2;
2378 asm volatile(
83c89c78 2379#if defined(PIC)
2da0d70d
DB
2380 "push %%"REG_b" \n\t"
2381#endif
2382 "pxor %%mm7, %%mm7 \n\t"
2383 "movq "MANGLE(w02)", %%mm6 \n\t"
2384 "push %%"REG_BP" \n\t" // we use 7 regs here ...
2385 "mov %%"REG_a", %%"REG_BP" \n\t"
2386 ASMALIGN(4)
2387 "1: \n\t"
2388 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
2389 "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t"
2390 "movq (%1, %%"REG_BP", 8), %%mm1 \n\t"
2391 "movq 16(%1, %%"REG_BP", 8), %%mm3 \n\t"
2392 "movd (%3, %%"REG_a"), %%mm0 \n\t"
2393 "movd (%3, %%"REG_b"), %%mm2 \n\t"
2394 "punpcklbw %%mm7, %%mm0 \n\t"
2395 "punpcklbw %%mm7, %%mm2 \n\t"
2396 "pmaddwd %%mm1, %%mm0 \n\t"
2397 "pmaddwd %%mm2, %%mm3 \n\t"
2398
2399 "movq 8(%1, %%"REG_BP", 8), %%mm1 \n\t"
2400 "movq 24(%1, %%"REG_BP", 8), %%mm5 \n\t"
2401 "movd 4(%3, %%"REG_a"), %%mm4 \n\t"
2402 "movd 4(%3, %%"REG_b"), %%mm2 \n\t"
2403 "punpcklbw %%mm7, %%mm4 \n\t"
2404 "punpcklbw %%mm7, %%mm2 \n\t"
2405 "pmaddwd %%mm1, %%mm4 \n\t"
2406 "pmaddwd %%mm2, %%mm5 \n\t"
2407 "paddd %%mm4, %%mm0 \n\t"
2408 "paddd %%mm5, %%mm3 \n\t"
2409
2410 "psrad $8, %%mm0 \n\t"
2411 "psrad $8, %%mm3 \n\t"
2412 "packssdw %%mm3, %%mm0 \n\t"
2413 "pmaddwd %%mm6, %%mm0 \n\t"
2414 "packssdw %%mm0, %%mm0 \n\t"
2415 "movd %%mm0, (%4, %%"REG_BP") \n\t"
2416 "add $4, %%"REG_BP" \n\t"
2417 " jnc 1b \n\t"
2418
2419 "pop %%"REG_BP" \n\t"
83c89c78 2420#if defined(PIC)
2da0d70d 2421 "pop %%"REG_b" \n\t"
83c89c78 2422#endif
2da0d70d
DB
2423 : "+a" (counter)
2424 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
83c89c78 2425#if !defined(PIC)
2da0d70d
DB
2426 : "%"REG_b
2427#endif
2428 );
2429 }
2430 else
2431 {
2432 uint8_t *offset = src+filterSize;
2433 long counter= -2*dstW;
2434 //filter-= counter*filterSize/2;
2435 filterPos-= counter/2;
2436 dst-= counter/2;
2437 asm volatile(
2438 "pxor %%mm7, %%mm7 \n\t"
2439 "movq "MANGLE(w02)", %%mm6 \n\t"
2440 ASMALIGN(4)
2441 "1: \n\t"
2442 "mov %2, %%"REG_c" \n\t"
2443 "movzwl (%%"REG_c", %0), %%eax \n\t"
2444 "movzwl 2(%%"REG_c", %0), %%edx \n\t"
2445 "mov %5, %%"REG_c" \n\t"
2446 "pxor %%mm4, %%mm4 \n\t"
2447 "pxor %%mm5, %%mm5 \n\t"
2448 "2: \n\t"
2449 "movq (%1), %%mm1 \n\t"
2450 "movq (%1, %6), %%mm3 \n\t"
2451 "movd (%%"REG_c", %%"REG_a"), %%mm0 \n\t"
2452 "movd (%%"REG_c", %%"REG_d"), %%mm2 \n\t"
2453 "punpcklbw %%mm7, %%mm0 \n\t"
2454 "punpcklbw %%mm7, %%mm2 \n\t"
2455 "pmaddwd %%mm1, %%mm0 \n\t"
2456 "pmaddwd %%mm2, %%mm3 \n\t"
2457 "paddd %%mm3, %%mm5 \n\t"
2458 "paddd %%mm0, %%mm4 \n\t"
2459 "add $8, %1 \n\t"
2460 "add $4, %%"REG_c" \n\t"
2461 "cmp %4, %%"REG_c" \n\t"
2462 " jb 2b \n\t"
2463 "add %6, %1 \n\t"
2464 "psrad $8, %%mm4 \n\t"
2465 "psrad $8, %%mm5 \n\t"
2466 "packssdw %%mm5, %%mm4 \n\t"
2467 "pmaddwd %%mm6, %%mm4 \n\t"
2468 "packssdw %%mm4, %%mm4 \n\t"
2469 "mov %3, %%"REG_a" \n\t"
2470 "movd %%mm4, (%%"REG_a", %0) \n\t"
2471 "add $4, %0 \n\t"
2472 " jnc 1b \n\t"
2473
2474 : "+r" (counter), "+r" (filter)
2475 : "m" (filterPos), "m" (dst), "m"(offset),
2476 "m" (src), "r" (filterSize*2)
2477 : "%"REG_a, "%"REG_c, "%"REG_d
2478 );
2479 }
077ea8a7 2480#else
8c266f0c 2481#ifdef HAVE_ALTIVEC
2da0d70d 2482 hScale_altivec_real(dst, dstW, src, srcW, xInc, filter, filterPos, filterSize);
8c266f0c 2483#else
2da0d70d
DB
2484 int i;
2485 for (i=0; i<dstW; i++)
2486 {
2487 int j;
2488 int srcPos= filterPos[i];
2489 int val=0;
2490 //printf("filterPos: %d\n", filterPos[i]);
2491 for (j=0; j<filterSize; j++)
2492 {
2493 //printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]);
2494 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
2495 }
2496 //filter += hFilterSize;
2497 dst[i] = av_clip(val>>7, 0, (1<<15)-1); // the cubic equation does overflow ...
2498 //dst[i] = val>>7;
2499 }
bc279024
DB
2500#endif /* HAVE_ALTIVEC */
2501#endif /* HAVE_MMX */
077ea8a7 2502}
2ff198c1 2503 // *** horizontal scale Y line to temp buffer
6bc0c792 2504static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth, uint8_t *src, int srcW, int xInc,
2da0d70d
DB
2505 int flags, int canMMX2BeUsed, int16_t *hLumFilter,
2506 int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode,
2507 int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
2508 int32_t *mmx2FilterPos, uint8_t *pal)
077ea8a7 2509{
2da0d70d 2510 if (srcFormat==PIX_FMT_YUYV422 || srcFormat==PIX_FMT_GRAY16BE)
1e621b18 2511 {
2da0d70d
DB
2512 RENAME(yuy2ToY)(formatConvBuffer, src, srcW);
2513 src= formatConvBuffer;
1e621b18 2514 }
2da0d70d 2515 else if (srcFormat==PIX_FMT_UYVY422 || srcFormat==PIX_FMT_GRAY16LE)
7322a67c 2516 {
2da0d70d
DB
2517 RENAME(uyvyToY)(formatConvBuffer, src, srcW);
2518 src= formatConvBuffer;
7322a67c 2519 }
2da0d70d 2520 else if (srcFormat==PIX_FMT_RGB32)
1e621b18 2521 {
2da0d70d
DB
2522 RENAME(bgr32ToY)(formatConvBuffer, src, srcW);
2523 src= formatConvBuffer;
1e621b18 2524 }
9990e426
MN
2525 else if (srcFormat==PIX_FMT_RGB32_1)
2526 {
2527 RENAME(bgr32ToY)(formatConvBuffer, src+ALT32_CORR, srcW);
2528 src= formatConvBuffer;
2529 }
2da0d70d 2530 else if (srcFormat==PIX_FMT_BGR24)
1e621b18 2531 {
2da0d70d
DB
2532 RENAME(bgr24ToY)(formatConvBuffer, src, srcW);
2533 src= formatConvBuffer;
1e621b18 2534 }
2da0d70d 2535 else if (srcFormat==PIX_FMT_BGR565)
6af250ea 2536 {
2da0d70d
DB
2537 RENAME(bgr16ToY)(formatConvBuffer, src, srcW);
2538 src= formatConvBuffer;
6af250ea 2539 }
2da0d70d 2540 else if (srcFormat==PIX_FMT_BGR555)
b72034dd 2541 {
2da0d70d
DB
2542 RENAME(bgr15ToY)(formatConvBuffer, src, srcW);
2543 src= formatConvBuffer;
b72034dd 2544 }
2da0d70d 2545 else if (srcFormat==PIX_FMT_BGR32)
a861d4d7 2546 {
2da0d70d
DB
2547 RENAME(rgb32ToY)(formatConvBuffer, src, srcW);
2548 src= formatConvBuffer;
a861d4d7 2549 }
9990e426
MN
2550 else if (srcFormat==PIX_FMT_BGR32_1)
2551 {
2552 RENAME(rgb32ToY)(formatConvBuffer, src+ALT32_CORR, srcW);
2553 src= formatConvBuffer;
2554 }
2da0d70d 2555 else if (srcFormat==PIX_FMT_RGB24)
a861d4d7 2556 {
2da0d70d
DB
2557 RENAME(rgb24ToY)(formatConvBuffer, src, srcW);
2558 src= formatConvBuffer;
a861d4d7 2559 }
2da0d70d 2560 else if (srcFormat==PIX_FMT_RGB565)
a43fb6b3 2561 {
2da0d70d
DB
2562 RENAME(rgb16ToY)(formatConvBuffer, src, srcW);
2563 src= formatConvBuffer;
a43fb6b3 2564 }
2da0d70d 2565 else if (srcFormat==PIX_FMT_RGB555)
a43fb6b3 2566 {
2da0d70d
DB
2567 RENAME(rgb15ToY)(formatConvBuffer, src, srcW);
2568 src= formatConvBuffer;
a43fb6b3 2569 }
2da0d70d 2570 else if (srcFormat==PIX_FMT_RGB8 || srcFormat==PIX_FMT_BGR8 || srcFormat==PIX_FMT_PAL8 || srcFormat==PIX_FMT_BGR4_BYTE || srcFormat==PIX_FMT_RGB4_BYTE)
e28630fc 2571 {
87cf861c 2572 RENAME(palToY)(formatConvBuffer, src, srcW, (uint32_t*)pal);
2da0d70d 2573 src= formatConvBuffer;
e28630fc 2574 }
1e621b18 2575
e3d2500f 2576#ifdef HAVE_MMX
8a322796 2577 // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one).
2da0d70d 2578 if (!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
e3d2500f 2579#else
2da0d70d 2580 if (!(flags&SWS_FAST_BILINEAR))
e3d2500f 2581#endif
077ea8a7 2582 {
2da0d70d 2583 RENAME(hScale)(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
077ea8a7 2584 }
8a322796 2585 else // fast bilinear upscale / crap downscale
077ea8a7 2586 {
3d6a30d9 2587#if defined(ARCH_X86)
2ff198c1 2588#ifdef HAVE_MMX2
2da0d70d 2589 int i;
83c89c78 2590#if defined(PIC)
2da0d70d 2591 uint64_t ebxsave __attribute__((aligned(8)));
83c89c78 2592#endif
2da0d70d
DB
2593 if (canMMX2BeUsed)
2594 {
2595 asm volatile(
83c89c78 2596#if defined(PIC)
2da0d70d
DB
2597 "mov %%"REG_b", %5 \n\t"
2598#endif
2599 "pxor %%mm7, %%mm7 \n\t"
2600 "mov %0, %%"REG_c" \n\t"
2601 "mov %1, %%"REG_D" \n\t"
2602 "mov %2, %%"REG_d" \n\t"
2603 "mov %3, %%"REG_b" \n\t"
2604 "xor %%"REG_a", %%"REG_a" \n\t" // i
2605 PREFETCH" (%%"REG_c") \n\t"
2606 PREFETCH" 32(%%"REG_c") \n\t"
2607 PREFETCH" 64(%%"REG_c") \n\t"
99cefd0b 2608
6d606c4f
AJ
2609#ifdef ARCH_X86_64
2610
2611#define FUNNY_Y_CODE \
2da0d70d
DB
2612 "movl (%%"REG_b"), %%esi \n\t"\
2613 "call *%4 \n\t"\
2614 "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\
2615 "add %%"REG_S", %%"REG_c" \n\t"\
2616 "add %%"REG_a", %%"REG_D" \n\t"\
2617 "xor %%"REG_a", %%"REG_a" \n\t"\
6d606c4f
AJ
2618
2619#else
2620
2ff198c1 2621#define FUNNY_Y_CODE \
2da0d70d
DB
2622 "movl (%%"REG_b"), %%esi \n\t"\
2623 "call *%4 \n\t"\
2624 "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
2625 "add %%"REG_a", %%"REG_D" \n\t"\
2626 "xor %%"REG_a", %%"REG_a" \n\t"\
99cefd0b 2627
bc279024 2628#endif /* ARCH_X86_64 */
6d606c4f 2629
2ff198c1
MN
2630FUNNY_Y_CODE
2631FUNNY_Y_CODE
2632FUNNY_Y_CODE
2633FUNNY_Y_CODE
2634FUNNY_Y_CODE
2635FUNNY_Y_CODE
2636FUNNY_Y_CODE
2637FUNNY_Y_CODE
2638
83c89c78 2639#if defined(PIC)
2da0d70d 2640 "mov %5, %%"REG_b" \n\t"
83c89c78 2641#endif
2da0d70d
DB
2642 :: "m" (src), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
2643 "m" (funnyYCode)
83c89c78 2644#if defined(PIC)
2da0d70d 2645 ,"m" (ebxsave)
83c89c78 2646#endif
2da0d70d 2647 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
83c89c78 2648#if !defined(PIC)
2da0d70d
DB
2649 ,"%"REG_b
2650#endif
2651 );
2652 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128;
2653 }
2654 else
2655 {
bc279024 2656#endif /* HAVE_MMX2 */
2da0d70d
DB
2657 long xInc_shr16 = xInc >> 16;
2658 uint16_t xInc_mask = xInc & 0xffff;
2659 //NO MMX just normal asm ...
2660 asm volatile(
2661 "xor %%"REG_a", %%"REG_a" \n\t" // i
2662 "xor %%"REG_d", %%"REG_d" \n\t" // xx
2663 "xorl %%ecx, %%ecx \n\t" // 2*xalpha
2664 ASMALIGN(4)
2665 "1: \n\t"
2666 "movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx]
2667 "movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1]
2668 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
2669 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha
2670 "shll $16, %%edi \n\t"
2671 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
2672 "mov %1, %%"REG_D" \n\t"
2673 "shrl $9, %%esi \n\t"
2674 "movw %%si, (%%"REG_D", %%"REG_a", 2) \n\t"
2675 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF
2676 "adc %3, %%"REG_d" \n\t" //xx+= xInc>>8 + carry
2677
2678 "movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx]
2679 "movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1]
2680 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
2681 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha
2682 "shll $16, %%edi \n\t"
2683 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
2684 "mov %1, %%"REG_D" \n\t"
2685 "shrl $9, %%esi \n\t"
2686 "movw %%si, 2(%%"REG_D", %%"REG_a", 2) \n\t"
2687 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF
2688 "adc %3, %%"REG_d" \n\t" //xx+= xInc>>8 + carry
2689
2690
2691 "add $2, %%"REG_a" \n\t"
2692 "cmp %2, %%"REG_a" \n\t"
2693 " jb 1b \n\t"
2694
2695
2696 :: "r" (src), "m" (dst), "m" (dstWidth), "m" (xInc_shr16), "m" (xInc_mask)
2697 : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
2698 );
2ff198c1 2699#ifdef HAVE_MMX2
2da0d70d 2700 } //if MMX2 can't be used
2ff198c1
MN
2701#endif
2702#else
2da0d70d
DB
2703 int i;
2704 unsigned int xpos=0;
2705 for (i=0;i<dstWidth;i++)
2706 {
2707 register unsigned int xx=xpos>>16;
2708 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2709 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2710 xpos+=xInc;
2711 }
bc279024 2712#endif /* defined(ARCH_X86) */
077ea8a7 2713 }
6bc0c792
MN
2714
2715 if(c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))){
2716 int i;
2717 //FIXME all pal and rgb srcFormats could do this convertion as well
2718 //FIXME all scalers more complex than bilinear could do half of this transform
2719 if(c->srcRange){
2720 for (i=0; i<dstWidth; i++)
2721 dst[i]= (dst[i]*14071 + 33561947)>>14;
2722 }else{
2723 for (i=0; i<dstWidth; i++)
aa13b0fc 2724 dst[i]= (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
6bc0c792
MN
2725 }
2726 }
2ff198c1
MN
2727}
2728
6bc0c792 2729inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, uint8_t *src1, uint8_t *src2,
2da0d70d
DB
2730 int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter,
2731 int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode,
2732 int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
2733 int32_t *mmx2FilterPos, uint8_t *pal)
2ff198c1 2734{
2da0d70d 2735 if (srcFormat==PIX_FMT_YUYV422)
1e621b18 2736 {
8b2fce0d 2737 RENAME(yuy2ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW);
2da0d70d 2738 src1= formatConvBuffer;
8b2fce0d 2739 src2= formatConvBuffer+VOFW;
1e621b18 2740 }
2da0d70d 2741 else if (srcFormat==PIX_FMT_UYVY422)
7322a67c 2742 {
8b2fce0d 2743 RENAME(uyvyToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW);
2da0d70d 2744 src1= formatConvBuffer;
8b2fce0d 2745 src2= formatConvBuffer+VOFW;
7322a67c 2746 }
2da0d70d 2747 else if (srcFormat==PIX_FMT_RGB32)
1e621b18 2748 {
8b2fce0d 2749 RENAME(bgr32ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW);
2da0d70d 2750 src1= formatConvBuffer;
8b2fce0d 2751 src2= formatConvBuffer+VOFW;
1e621b18 2752 }
9990e426
MN
2753 else if (srcFormat==PIX_FMT_RGB32_1)
2754 {
2755 RENAME(bgr32ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1+ALT32_CORR, src2+ALT32_CORR, srcW);
2756 src1= formatConvBuffer;
2757 src2= formatConvBuffer+VOFW;
2758 }
2da0d70d 2759 else if (srcFormat==PIX_FMT_BGR24)
1e621b18 2760 {
8b2fce0d 2761 RENAME(bgr24ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW);
2da0d70d 2762 src1= formatConvBuffer;
8b2fce0d 2763 src2= formatConvBuffer+VOFW;
1e621b18 2764 }
2da0d70d 2765 else if (srcFormat==PIX_FMT_BGR565)
6af250ea 2766 {
8b2fce0d 2767 RENAME(bgr16ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW);
2da0d70d 2768 src1= formatConvBuffer;
8b2fce0d 2769 src2= formatConvBuffer+VOFW;
6af250ea 2770 }
2da0d70d 2771 else if (srcFormat==PIX_FMT_BGR555)
b72034dd 2772 {
8b2fce0d 2773 RENAME(bgr15ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW);
2da0d70d 2774 src1= formatConvBuffer;
8b2fce0d 2775 src2= formatConvBuffer+VOFW;
b72034dd 2776 }
2da0d70d 2777 else if (srcFormat==PIX_FMT_BGR32)
a861d4d7 2778 {
8b2fce0d 2779 RENAME(rgb32ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW);
2da0d70d 2780 src1= formatConvBuffer;
8b2fce0d 2781 src2= formatConvBuffer+VOFW;
a861d4d7 2782 }
9990e426
MN
2783 else if (srcFormat==PIX_FMT_BGR32_1)
2784 {
2785 RENAME(rgb32ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1+ALT32_CORR, src2+ALT32_CORR, srcW);
2786 src1= formatConvBuffer;
2787 src2= formatConvBuffer+VOFW;
2788 }
2da0d70d 2789 else if (srcFormat==PIX_FMT_RGB24)
a861d4d7 2790 {
8b2fce0d 2791 RENAME(rgb24ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW);
2da0d70d 2792 src1= formatConvBuffer;
8b2fce0d 2793 src2= formatConvBuffer+VOFW;
a861d4d7 2794 }
2da0d70d 2795 else if (srcFormat==PIX_FMT_RGB565)
a43fb6b3 2796 {
8b2fce0d 2797 RENAME(rgb16ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW);
2da0d70d 2798 src1= formatConvBuffer;
8b2fce0d 2799 src2= formatConvBuffer+VOFW;
a43fb6b3 2800 }
2da0d70d 2801 else if (srcFormat==PIX_FMT_RGB555)
a43fb6b3 2802 {
8b2fce0d 2803 RENAME(rgb15ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW);
2da0d70d 2804 src1= formatConvBuffer;
8b2fce0d 2805 src2= formatConvBuffer+VOFW;
a43fb6b3 2806 }
2da0d70d 2807 else if (isGray(srcFormat))
6ff0ad6b 2808 {
2da0d70d 2809 return;
6ff0ad6b 2810 }
2da0d70d 2811 else if (srcFormat==PIX_FMT_RGB8 || srcFormat==PIX_FMT_BGR8 || srcFormat==PIX_FMT_PAL8 || srcFormat==PIX_FMT_BGR4_BYTE || srcFormat==PIX_FMT_RGB4_BYTE)
e28630fc 2812 {
87cf861c 2813 RENAME(palToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, (uint32_t*)pal);
2da0d70d 2814 src1= formatConvBuffer;
8b2fce0d 2815 src2= formatConvBuffer+VOFW;
e28630fc 2816 }
1e621b18 2817
e3d2500f 2818#ifdef HAVE_MMX
8a322796 2819 // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one).
2da0d70d 2820 if (!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
e3d2500f 2821#else
2da0d70d 2822 if (!(flags&SWS_FAST_BILINEAR))
e3d2500f 2823#endif
077ea8a7 2824 {
2da0d70d 2825 RENAME(hScale)(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
8b2fce0d 2826 RENAME(hScale)(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
077ea8a7 2827 }
8a322796 2828 else // fast bilinear upscale / crap downscale
077ea8a7 2829 {
3d6a30d9 2830#if defined(ARCH_X86)
2ff198c1 2831#ifdef HAVE_MMX2
2da0d70d 2832 int i;
83c89c78 2833#if defined(PIC)
2da0d70d 2834 uint64_t ebxsave __attribute__((aligned(8)));
83c89c78 2835#endif
2da0d70d
DB
2836 if (canMMX2BeUsed)
2837 {
2838 asm volatile(
83c89c78 2839#if defined(PIC)
2da0d70d
DB
2840 "mov %%"REG_b", %6 \n\t"
2841#endif
2842 "pxor %%mm7, %%mm7 \n\t"
2843 "mov %0, %%"REG_c" \n\t"
2844 "mov %1, %%"REG_D" \n\t"
2845 "mov %2, %%"REG_d" \n\t"
2846 "mov %3, %%"REG_b" \n\t"
2847 "xor %%"REG_a", %%"REG_a" \n\t" // i
2848 PREFETCH" (%%"REG_c") \n\t"
2849 PREFETCH" 32(%%"REG_c") \n\t"
2850 PREFETCH" 64(%%"REG_c") \n\t"
b7dc6f66 2851
6d606c4f
AJ
2852#ifdef ARCH_X86_64
2853
2854#define FUNNY_UV_CODE \
2da0d70d
DB
2855 "movl (%%"REG_b"), %%esi \n\t"\
2856 "call *%4 \n\t"\
2857 "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\
2858 "add %%"REG_S", %%"REG_c" \n\t"\
2859 "add %%"REG_a", %%"REG_D" \n\t"\
2860 "xor %%"REG_a", %%"REG_a" \n\t"\
6d606c4f
AJ
2861
2862#else
2863
b7dc6f66 2864#define FUNNY_UV_CODE \
2da0d70d
DB
2865 "movl (%%"REG_b"), %%esi \n\t"\
2866 "call *%4 \n\t"\
2867 "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
2868 "add %%"REG_a", %%"REG_D" \n\t"\
2869 "xor %%"REG_a", %%"REG_a" \n\t"\
b7dc6f66 2870
bc279024 2871#endif /* ARCH_X86_64 */
6d606c4f 2872
b7dc6f66
MN
2873FUNNY_UV_CODE
2874FUNNY_UV_CODE
2875FUNNY_UV_CODE
2876FUNNY_UV_CODE
2da0d70d
DB
2877 "xor %%"REG_a", %%"REG_a" \n\t" // i
2878 "mov %5, %%"REG_c" \n\t" // src
2879 "mov %1, %%"REG_D" \n\t" // buf1
8b2fce0d 2880 "add $"AV_STRINGIFY(VOF)", %%"REG_D" \n\t"
2da0d70d
DB
2881 PREFETCH" (%%"REG_c") \n\t"
2882 PREFETCH" 32(%%"REG_c") \n\t"
2883 PREFETCH" 64(%%"REG_c") \n\t"
b7dc6f66
MN
2884
2885FUNNY_UV_CODE
2886FUNNY_UV_CODE
2887FUNNY_UV_CODE
2888FUNNY_UV_CODE
2889
83c89c78 2890#if defined(PIC)
2da0d70d 2891 "mov %6, %%"REG_b" \n\t"
83c89c78 2892#endif
2da0d70d
DB
2893 :: "m" (src1), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
2894 "m" (funnyUVCode), "m" (src2)
83c89c78 2895#if defined(PIC)
2da0d70d 2896 ,"m" (ebxsave)
83c89c78 2897#endif
2da0d70d 2898 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
83c89c78 2899#if !defined(PIC)
2da0d70d
DB
2900 ,"%"REG_b
2901#endif
2902 );
2903 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
2904 {
2905 //printf("%d %d %d\n", dstWidth, i, srcW);
2906 dst[i] = src1[srcW-1]*128;
8b2fce0d 2907 dst[i+VOFW] = src2[srcW-1]*128;
2da0d70d
DB
2908 }
2909 }
2910 else
2911 {
bc279024 2912#endif /* HAVE_MMX2 */
2da0d70d
DB
2913 long xInc_shr16 = (long) (xInc >> 16);
2914 uint16_t xInc_mask = xInc & 0xffff;
2915 asm volatile(
2916 "xor %%"REG_a", %%"REG_a" \n\t" // i
2917 "xor %%"REG_d", %%"REG_d" \n\t" // xx
2918 "xorl %%ecx, %%ecx \n\t" // 2*xalpha
2919 ASMALIGN(4)
2920 "1: \n\t"
2921 "mov %0, %%"REG_S" \n\t"
2922 "movzbl (%%"REG_S", %%"REG_d"), %%edi \n\t" //src[xx]
2923 "movzbl 1(%%"REG_S", %%"REG_d"), %%esi \n\t" //src[xx+1]
2924 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
2925 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha
2926 "shll $16, %%edi \n\t"
2927 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
2928 "mov %1, %%"REG_D" \n\t"
2929 "shrl $9, %%esi \n\t"
2930 "movw %%si, (%%"REG_D", %%"REG_a", 2) \n\t"
2931
2932 "movzbl (%5, %%"REG_d"), %%edi \n\t" //src[xx]
2933 "movzbl 1(%5, %%"REG_d"), %%esi \n\t" //src[xx+1]
2934 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
2935 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha
2936 "shll $16, %%edi \n\t"
2937 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
2938 "mov %1, %%"REG_D" \n\t"
2939 "shrl $9, %%esi \n\t"
8b2fce0d 2940 "movw %%si, "AV_STRINGIFY(VOF)"(%%"REG_D", %%"REG_a", 2) \n\t"
2da0d70d
DB
2941
2942 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF
2943 "adc %3, %%"REG_d" \n\t" //xx+= xInc>>8 + carry
2944 "add $1, %%"REG_a" \n\t"
2945 "cmp %2, %%"REG_a" \n\t"
2946 " jb 1b \n\t"
2ff198c1 2947
8a322796
DB
2948/* GCC 3.3 makes MPlayer crash on IA-32 machines when using "g" operand here,
2949 which is needed to support GCC 4.0. */
e5091488 2950#if defined(ARCH_X86_64) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
2da0d70d 2951 :: "m" (src1), "m" (dst), "g" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
dc77ef7f 2952#else
2da0d70d 2953 :: "m" (src1), "m" (dst), "m" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
dc77ef7f 2954#endif
2da0d70d
DB
2955 "r" (src2)
2956 : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
2957 );
2ff198c1 2958#ifdef HAVE_MMX2
2da0d70d 2959 } //if MMX2 can't be used
2ff198c1
MN
2960#endif
2961#else
2da0d70d
DB
2962 int i;
2963 unsigned int xpos=0;
2964 for (i=0;i<dstWidth;i++)
2965 {
2966 register unsigned int xx=xpos>>16;
2967 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2968 dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
8b2fce0d 2969 dst[i+VOFW]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2da0d70d
DB
2970 /* slower
2971 dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha;
8b2fce0d 2972 dst[i+VOFW]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha;
2da0d70d
DB
2973 */
2974 xpos+=xInc;
2975 }
bc279024 2976#endif /* defined(ARCH_X86) */
2da0d70d 2977 }
6bc0c792
MN
2978 if(c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))){
2979 int i;
2980 //FIXME all pal and rgb srcFormats could do this convertion as well
2981 //FIXME all scalers more complex than bilinear could do half of this transform
2982 if(c->srcRange){
2983 for (i=0; i<dstWidth; i++){
2984 dst[i ]= (dst[i ]*1799 + 4081085)>>11; //1469
2985 dst[i+VOFW]= (dst[i+VOFW]*1799 + 4081085)>>11; //1469
2986 }
2987 }else{
2988 for (i=0; i<dstWidth; i++){
aa13b0fc
MN
2989 dst[i ]= (FFMIN(dst[i ],30775)*4663 - 9289992)>>12; //-264
2990 dst[i+VOFW]= (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12; //-264
6bc0c792
MN
2991 }
2992 }
2993 }
077ea8a7
MN
2994}
2995
3e499f53 2996static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2da0d70d
DB
2997 int srcSliceH, uint8_t* dst[], int dstStride[]){
2998
2999 /* load a few things into local vars to make the code more readable? and faster */
3000 const int srcW= c->srcW;
3001 const int dstW= c->dstW;
3002 const int dstH= c->dstH;
3003 const int chrDstW= c->chrDstW;
3004 const int chrSrcW= c->chrSrcW;
3005 const int lumXInc= c->lumXInc;
3006 const int chrXInc= c->chrXInc;
3007 const int dstFormat= c->dstFormat;
3008 const int srcFormat= c->srcFormat;
3009 const int flags= c->flags;
3010 const int canMMX2BeUsed= c->canMMX2BeUsed;
3011 int16_t *vLumFilterPos= c->vLumFilterPos;
3012 int16_t *vChrFilterPos= c->vChrFilterPos;
3013 int16_t *hLumFilterPos= c->hLumFilterPos;
3014 int16_t *hChrFilterPos= c->hChrFilterPos;
3015 int16_t *vLumFilter= c->vLumFilter;
3016 int16_t *vChrFilter= c->vChrFilter;
3017 int16_t *hLumFilter= c->hLumFilter;
3018 int16_t *hChrFilter= c->hChrFilter;
3019 int32_t *lumMmxFilter= c->lumMmxFilter;
3020 int32_t *chrMmxFilter= c->chrMmxFilter;
3021 const int vLumFilterSize= c->vLumFilterSize;
3022 const int vChrFilterSize= c->vChrFilterSize;
3023 const int hLumFilterSize= c->hLumFilterSize;
3024 const int hChrFilterSize= c->hChrFilterSize;
3025 int16_t **lumPixBuf= c->lumPixBuf;
3026 int16_t **chrPixBuf= c->chrPixBuf;
3027 const int vLumBufSize= c->vLumBufSize;
3028 const int vChrBufSize= c->vChrBufSize;
3029 uint8_t *funnyYCode= c->funnyYCode;
3030 uint8_t *funnyUVCode= c->funnyUVCode;
3031 uint8_t *formatConvBuffer= c->formatConvBuffer;
3032 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
3033 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
3034 int lastDstY;
3035 uint8_t *pal=NULL;
3036
8a322796 3037 /* vars which will change and which we need to store back in the context */
2da0d70d
DB
3038 int dstY= c->dstY;
3039 int lumBufIndex= c->lumBufIndex;
3040 int chrBufIndex= c->chrBufIndex;
3041 int lastInLumBuf= c->lastInLumBuf;
3042 int lastInChrBuf= c->lastInChrBuf;
3043
3044 if (isPacked(c->srcFormat)){
3045 pal= src[1];
3046 src[0]=
3047 src[1]=
3048 src[2]= src[0];
3049 srcStride[0]=
3050 srcStride[1]=
3051 srcStride[2]= srcStride[0];
3052 }
3053 srcStride[1]<<= c->vChrDrop;
3054 srcStride[2]<<= c->vChrDrop;
3055
3056 //printf("swscale %X %X %X -> %X %X %X\n", (int)src[0], (int)src[1], (int)src[2],
3057 // (int)dst[0], (int)dst[1], (int)dst[2]);
c7a810cc
MN
3058
3059#if 0 //self test FIXME move to a vfilter or something
2da0d70d
DB
3060 {
3061 static volatile int i=0;
3062 i++;
3063 if (srcFormat==PIX_FMT_YUV420P && i==1 && srcSliceH>= c->srcH)
3064 selfTest(src, srcStride, c->srcW, c->srcH);
3065 i--;
3066 }
c7a810cc 3067#endif
37079906 3068
2da0d70d
DB
3069 //printf("sws Strides:%d %d %d -> %d %d %d\n", srcStride[0],srcStride[1],srcStride[2],
3070 //dstStride[0],dstStride[1],dstStride[2]);
3071
3072 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0)
3073 {
3074 static int firstTime=1; //FIXME move this into the context perhaps
3075 if (flags & SWS_PRINT_INFO && firstTime)
3076 {
4b0c30b7 3077 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
8a322796 3078 " ->cannot do aligned memory accesses anymore\n");
2da0d70d
DB
3079 firstTime=0;
3080 }
3081 }
3082
8a322796
DB
3083 /* Note the user might start scaling the picture in the middle so this
3084 will not get executed. This is not really intended but works
3085 currently, so people might do it. */
2da0d70d
DB
3086 if (srcSliceY ==0){
3087 lumBufIndex=0;
3088 chrBufIndex=0;
3089 dstY=0;
3090 lastInLumBuf= -1;
3091 lastInChrBuf= -1;
3092 }
3093
3094 lastDstY= dstY;
3095
3096 for (;dstY < dstH; dstY++){
3097 unsigned char *dest =dst[0]+dstStride[0]*dstY;
3098 const int chrDstY= dstY>>c->chrDstVSubSample;
3099 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
3100 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
3101
3102 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
3103 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
3104 const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
3105 const int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
3106
3107 //printf("dstY:%d dstH:%d firstLumSrcY:%d lastInLumBuf:%d vLumBufSize: %d vChrBufSize: %d slice: %d %d vLumFilterSize: %d firstChrSrcY: %d vChrFilterSize: %d c->chrSrcVSubSample: %d\n",
3108 // dstY, dstH, firstLumSrcY, lastInLumBuf, vLumBufSize, vChrBufSize, srcSliceY, srcSliceH, vLumFilterSize, firstChrSrcY, vChrFilterSize, c->chrSrcVSubSample);
3109 //handle holes (FAST_BILINEAR & weird filters)
3110 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
3111 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
3112 //printf("%d %d %d\n", firstChrSrcY, lastInChrBuf, vChrBufSize);
fcc402b1
LB
3113 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
3114 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2da0d70d
DB
3115
3116 // Do we have enough lines in this slice to output the dstY line
3117 if (lastLumSrcY < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample))
3118 {
3119 //Do horizontal scaling
3120 while(lastInLumBuf < lastLumSrcY)
3121 {
3122 uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
3123 lumBufIndex++;
3124 //printf("%d %d %d %d\n", lumBufIndex, vLumBufSize, lastInLumBuf, lastLumSrcY);
fcc402b1
LB
3125 assert(lumBufIndex < 2*vLumBufSize);
3126 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
3127 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2da0d70d 3128 //printf("%d %d\n", lumBufIndex, vLumBufSize);
6bc0c792 3129 RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
2da0d70d
DB
3130 flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
3131 funnyYCode, c->srcFormat, formatConvBuffer,
3132 c->lumMmx2Filter, c->lumMmx2FilterPos, pal);
3133 lastInLumBuf++;
3134 }
3135 while(lastInChrBuf < lastChrSrcY)
3136 {
3137 uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
3138 uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
3139 chrBufIndex++;
fcc402b1
LB
3140 assert(chrBufIndex < 2*vChrBufSize);
3141 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
3142 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2da0d70d
DB
3143 //FIXME replace parameters through context struct (some at least)
3144
3145 if (!(isGray(srcFormat) || isGray(dstFormat)))
6bc0c792 3146 RENAME(hcscale)(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
2da0d70d
DB
3147 flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
3148 funnyUVCode, c->srcFormat, formatConvBuffer,
3149 c->chrMmx2Filter, c->chrMmx2FilterPos, pal);
3150 lastInChrBuf++;
3151 }
3152 //wrap buf index around to stay inside the ring buffer
e5091488
BF
3153 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
3154 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2da0d70d
DB
3155 }
3156 else // not enough lines left in this slice -> load the rest in the buffer
3157 {
3158 /* printf("%d %d Last:%d %d LastInBuf:%d %d Index:%d %d Y:%d FSize: %d %d BSize: %d %d\n",
3159 firstChrSrcY,firstLumSrcY,lastChrSrcY,lastLumSrcY,
3160 lastInChrBuf,lastInLumBuf,chrBufIndex,lumBufIndex,dstY,vChrFilterSize,vLumFilterSize,
3161 vChrBufSize, vLumBufSize);*/
3162
3163 //Do horizontal scaling
3164 while(lastInLumBuf+1 < srcSliceY + srcSliceH)
3165 {
3166 uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
3167 lumBufIndex++;
fcc402b1
LB
3168 assert(lumBufIndex < 2*vLumBufSize);
3169 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
3170 assert(lastInLumBuf + 1 - srcSliceY >= 0);
6bc0c792 3171 RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
2da0d70d
DB
3172 flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
3173 funnyYCode, c->srcFormat, formatConvBuffer,
3174 c->lumMmx2Filter, c->lumMmx2FilterPos, pal);
3175 lastInLumBuf++;
3176 }
3177 while(lastInChrBuf+1 < (chrSrcSliceY + chrSrcSliceH))
3178 {
3179 uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
3180 uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
3181 chrBufIndex++;
fcc402b1
LB
3182 assert(chrBufIndex < 2*vChrBufSize);
3183 assert(lastInChrBuf + 1 - chrSrcSliceY < chrSrcSliceH);
3184 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2da0d70d
DB
3185
3186 if (!(isGray(srcFormat) || isGray(dstFormat)))
6bc0c792 3187 RENAME(hcscale)(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,