-fPIC compileable
[libav.git] / libavcodec / dsputil.c
CommitLineData
de6d9b64
FB
1/*
2 * DSP utils
ff4ec49e 3 * Copyright (c) 2000, 2001 Fabrice Bellard.
de6d9b64 4 *
ff4ec49e
FB
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
de6d9b64 9 *
ff4ec49e 10 * This library is distributed in the hope that it will be useful,
de6d9b64 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ff4ec49e
FB
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
de6d9b64 14 *
ff4ec49e
FB
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
7ff037e9 18 *
59fe111e 19 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
de6d9b64 20 */
de6d9b64
FB
21#include "avcodec.h"
22#include "dsputil.h"
1457ab52 23#include "mpegvideo.h"
45553457 24
5596c60c
MN
25int ff_bit_exact=0;
26
0cfa9713 27UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
de6d9b64
FB
28UINT32 squareTbl[512];
29
2ad1516a
MN
30const UINT8 ff_zigzag_direct[64] = {
31 0, 1, 8, 16, 9, 2, 3, 10,
32 17, 24, 32, 25, 18, 11, 4, 5,
e0eac44e 33 12, 19, 26, 33, 40, 48, 41, 34,
2ad1516a 34 27, 20, 13, 6, 7, 14, 21, 28,
e0eac44e
FB
35 35, 42, 49, 56, 57, 50, 43, 36,
36 29, 22, 15, 23, 30, 37, 44, 51,
37 58, 59, 52, 45, 38, 31, 39, 46,
38 53, 60, 61, 54, 47, 55, 62, 63
39};
40
2f349de2
MN
41/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
42UINT16 __align8 inv_zigzag_direct16[64];
43
2ad1516a
MN
44const UINT8 ff_alternate_horizontal_scan[64] = {
45 0, 1, 2, 3, 8, 9, 16, 17,
e0eac44e
FB
46 10, 11, 4, 5, 6, 7, 15, 14,
47 13, 12, 19, 18, 24, 25, 32, 33,
48 26, 27, 20, 21, 22, 23, 28, 29,
49 30, 31, 34, 35, 40, 41, 48, 49,
50 42, 43, 36, 37, 38, 39, 44, 45,
51 46, 47, 50, 51, 56, 57, 58, 59,
52 52, 53, 54, 55, 60, 61, 62, 63,
53};
54
2ad1516a
MN
55const UINT8 ff_alternate_vertical_scan[64] = {
56 0, 8, 16, 24, 1, 9, 2, 10,
e0eac44e
FB
57 17, 25, 32, 40, 48, 56, 57, 49,
58 41, 33, 26, 18, 3, 11, 4, 12,
59 19, 27, 34, 42, 50, 58, 35, 43,
60 51, 59, 20, 28, 5, 13, 6, 14,
61 21, 29, 36, 44, 52, 60, 37, 45,
62 53, 61, 22, 30, 7, 15, 23, 31,
63 38, 46, 54, 62, 39, 47, 55, 63,
64};
65
2f349de2 66/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
eb4b3dd3 67const UINT32 inverse[256]={
2f349de2
MN
68 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
69 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
70 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
71 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
72 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
73 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
74 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
75 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
76 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
77 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
78 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
79 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
80 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
81 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
82 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
83 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
84 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
85 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
86 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
87 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
88 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
89 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
90 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
91 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
92 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
93 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
94 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
95 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
96 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
97 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
98 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
99 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
100};
101
eb4b3dd3 102static int pix_sum_c(UINT8 * pix, int line_size)
3aa102be
MN
103{
104 int s, i, j;
105
106 s = 0;
107 for (i = 0; i < 16; i++) {
108 for (j = 0; j < 16; j += 8) {
109 s += pix[0];
110 s += pix[1];
111 s += pix[2];
112 s += pix[3];
113 s += pix[4];
114 s += pix[5];
115 s += pix[6];
116 s += pix[7];
117 pix += 8;
118 }
119 pix += line_size - 16;
120 }
121 return s;
122}
123
eb4b3dd3 124static int pix_norm1_c(UINT8 * pix, int line_size)
3aa102be
MN
125{
126 int s, i, j;
127 UINT32 *sq = squareTbl + 256;
128
129 s = 0;
130 for (i = 0; i < 16; i++) {
131 for (j = 0; j < 16; j += 8) {
2a006cd3 132#if 0
3aa102be
MN
133 s += sq[pix[0]];
134 s += sq[pix[1]];
135 s += sq[pix[2]];
136 s += sq[pix[3]];
137 s += sq[pix[4]];
138 s += sq[pix[5]];
139 s += sq[pix[6]];
140 s += sq[pix[7]];
2a006cd3
FL
141#else
142#if LONG_MAX > 2147483647
143 register uint64_t x=*(uint64_t*)pix;
144 s += sq[x&0xff];
145 s += sq[(x>>8)&0xff];
146 s += sq[(x>>16)&0xff];
147 s += sq[(x>>24)&0xff];
148 s += sq[(x>>32)&0xff];
149 s += sq[(x>>40)&0xff];
150 s += sq[(x>>48)&0xff];
151 s += sq[(x>>56)&0xff];
152#else
153 register uint32_t x=*(uint32_t*)pix;
154 s += sq[x&0xff];
155 s += sq[(x>>8)&0xff];
156 s += sq[(x>>16)&0xff];
157 s += sq[(x>>24)&0xff];
158 x=*(uint32_t*)(pix+4);
159 s += sq[x&0xff];
160 s += sq[(x>>8)&0xff];
161 s += sq[(x>>16)&0xff];
162 s += sq[(x>>24)&0xff];
163#endif
164#endif
3aa102be
MN
165 pix += 8;
166 }
167 pix += line_size - 16;
168 }
169 return s;
170}
171
172
1457ab52
MN
173static int sse8_c(void *v, UINT8 * pix1, UINT8 * pix2, int line_size)
174{
175 int s, i;
176 UINT32 *sq = squareTbl + 256;
177
178 s = 0;
179 for (i = 0; i < 8; i++) {
180 s += sq[pix1[0] - pix2[0]];
181 s += sq[pix1[1] - pix2[1]];
182 s += sq[pix1[2] - pix2[2]];
183 s += sq[pix1[3] - pix2[3]];
184 s += sq[pix1[4] - pix2[4]];
185 s += sq[pix1[5] - pix2[5]];
186 s += sq[pix1[6] - pix2[6]];
187 s += sq[pix1[7] - pix2[7]];
188 pix1 += line_size;
189 pix2 += line_size;
190 }
191 return s;
192}
193
6b026927 194static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size)
9c76bd48 195{
6b026927
FH
196 int s, i;
197 uint32_t *sq = squareTbl + 256;
9c76bd48
BF
198
199 s = 0;
200 for (i = 0; i < 16; i++) {
6b026927
FH
201 s += sq[pix1[ 0] - pix2[ 0]];
202 s += sq[pix1[ 1] - pix2[ 1]];
203 s += sq[pix1[ 2] - pix2[ 2]];
204 s += sq[pix1[ 3] - pix2[ 3]];
205 s += sq[pix1[ 4] - pix2[ 4]];
206 s += sq[pix1[ 5] - pix2[ 5]];
207 s += sq[pix1[ 6] - pix2[ 6]];
208 s += sq[pix1[ 7] - pix2[ 7]];
209 s += sq[pix1[ 8] - pix2[ 8]];
210 s += sq[pix1[ 9] - pix2[ 9]];
211 s += sq[pix1[10] - pix2[10]];
212 s += sq[pix1[11] - pix2[11]];
213 s += sq[pix1[12] - pix2[12]];
214 s += sq[pix1[13] - pix2[13]];
215 s += sq[pix1[14] - pix2[14]];
216 s += sq[pix1[15] - pix2[15]];
2a006cd3 217
6b026927
FH
218 pix1 += line_size;
219 pix2 += line_size;
9c76bd48
BF
220 }
221 return s;
222}
223
eb4b3dd3 224static void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size)
de6d9b64 225{
de6d9b64
FB
226 int i;
227
228 /* read the pixels */
de6d9b64 229 for(i=0;i<8;i++) {
c13e1abd
FH
230 block[0] = pixels[0];
231 block[1] = pixels[1];
232 block[2] = pixels[2];
233 block[3] = pixels[3];
234 block[4] = pixels[4];
235 block[5] = pixels[5];
236 block[6] = pixels[6];
237 block[7] = pixels[7];
238 pixels += line_size;
239 block += 8;
de6d9b64
FB
240 }
241}
242
eb4b3dd3
ZK
243static void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1,
244 const UINT8 *s2, int stride){
9dbcbd92
MN
245 int i;
246
247 /* read the pixels */
9dbcbd92 248 for(i=0;i<8;i++) {
c13e1abd
FH
249 block[0] = s1[0] - s2[0];
250 block[1] = s1[1] - s2[1];
251 block[2] = s1[2] - s2[2];
252 block[3] = s1[3] - s2[3];
253 block[4] = s1[4] - s2[4];
254 block[5] = s1[5] - s2[5];
255 block[6] = s1[6] - s2[6];
256 block[7] = s1[7] - s2[7];
9dbcbd92
MN
257 s1 += stride;
258 s2 += stride;
c13e1abd 259 block += 8;
9dbcbd92
MN
260 }
261}
262
263
eb4b3dd3
ZK
264static void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
265 int line_size)
de6d9b64 266{
de6d9b64
FB
267 int i;
268 UINT8 *cm = cropTbl + MAX_NEG_CROP;
269
270 /* read the pixels */
de6d9b64 271 for(i=0;i<8;i++) {
c13e1abd
FH
272 pixels[0] = cm[block[0]];
273 pixels[1] = cm[block[1]];
274 pixels[2] = cm[block[2]];
275 pixels[3] = cm[block[3]];
276 pixels[4] = cm[block[4]];
277 pixels[5] = cm[block[5]];
278 pixels[6] = cm[block[6]];
279 pixels[7] = cm[block[7]];
280
281 pixels += line_size;
282 block += 8;
de6d9b64
FB
283 }
284}
285
eb4b3dd3 286static void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
c13e1abd 287 int line_size)
de6d9b64 288{
de6d9b64
FB
289 int i;
290 UINT8 *cm = cropTbl + MAX_NEG_CROP;
291
292 /* read the pixels */
de6d9b64 293 for(i=0;i<8;i++) {
c13e1abd
FH
294 pixels[0] = cm[pixels[0] + block[0]];
295 pixels[1] = cm[pixels[1] + block[1]];
296 pixels[2] = cm[pixels[2] + block[2]];
297 pixels[3] = cm[pixels[3] + block[3]];
298 pixels[4] = cm[pixels[4] + block[4]];
299 pixels[5] = cm[pixels[5] + block[5]];
300 pixels[6] = cm[pixels[6] + block[6]];
301 pixels[7] = cm[pixels[7] + block[7]];
302 pixels += line_size;
303 block += 8;
de6d9b64
FB
304 }
305}
59fe111e
MN
306#if 0
307
308#define PIXOP2(OPNAME, OP) \
b3184779 309static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
310{\
311 int i;\
312 for(i=0; i<h; i++){\
313 OP(*((uint64_t*)block), LD64(pixels));\
314 pixels+=line_size;\
315 block +=line_size;\
316 }\
317}\
318\
45553457 319static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
320{\
321 int i;\
322 for(i=0; i<h; i++){\
323 const uint64_t a= LD64(pixels );\
324 const uint64_t b= LD64(pixels+1);\
325 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
326 pixels+=line_size;\
327 block +=line_size;\
328 }\
329}\
330\
45553457 331static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
332{\
333 int i;\
334 for(i=0; i<h; i++){\
335 const uint64_t a= LD64(pixels );\
336 const uint64_t b= LD64(pixels+1);\
337 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
338 pixels+=line_size;\
339 block +=line_size;\
340 }\
341}\
342\
45553457 343static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
344{\
345 int i;\
346 for(i=0; i<h; i++){\
347 const uint64_t a= LD64(pixels );\
348 const uint64_t b= LD64(pixels+line_size);\
349 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
350 pixels+=line_size;\
351 block +=line_size;\
352 }\
353}\
354\
45553457 355static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
356{\
357 int i;\
358 for(i=0; i<h; i++){\
359 const uint64_t a= LD64(pixels );\
360 const uint64_t b= LD64(pixels+line_size);\
361 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
362 pixels+=line_size;\
363 block +=line_size;\
364 }\
365}\
366\
45553457 367static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
368{\
369 int i;\
370 const uint64_t a= LD64(pixels );\
371 const uint64_t b= LD64(pixels+1);\
372 uint64_t l0= (a&0x0303030303030303ULL)\
373 + (b&0x0303030303030303ULL)\
374 + 0x0202020202020202ULL;\
375 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
376 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
377 uint64_t l1,h1;\
378\
379 pixels+=line_size;\
380 for(i=0; i<h; i+=2){\
381 uint64_t a= LD64(pixels );\
382 uint64_t b= LD64(pixels+1);\
383 l1= (a&0x0303030303030303ULL)\
384 + (b&0x0303030303030303ULL);\
385 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
386 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
387 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
388 pixels+=line_size;\
389 block +=line_size;\
390 a= LD64(pixels );\
391 b= LD64(pixels+1);\
392 l0= (a&0x0303030303030303ULL)\
393 + (b&0x0303030303030303ULL)\
394 + 0x0202020202020202ULL;\
395 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
396 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
397 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
398 pixels+=line_size;\
399 block +=line_size;\
400 }\
401}\
402\
45553457 403static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
404{\
405 int i;\
406 const uint64_t a= LD64(pixels );\
407 const uint64_t b= LD64(pixels+1);\
408 uint64_t l0= (a&0x0303030303030303ULL)\
409 + (b&0x0303030303030303ULL)\
410 + 0x0101010101010101ULL;\
411 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
412 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
413 uint64_t l1,h1;\
414\
415 pixels+=line_size;\
416 for(i=0; i<h; i+=2){\
417 uint64_t a= LD64(pixels );\
418 uint64_t b= LD64(pixels+1);\
419 l1= (a&0x0303030303030303ULL)\
420 + (b&0x0303030303030303ULL);\
421 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
422 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
423 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
424 pixels+=line_size;\
425 block +=line_size;\
426 a= LD64(pixels );\
427 b= LD64(pixels+1);\
428 l0= (a&0x0303030303030303ULL)\
429 + (b&0x0303030303030303ULL)\
430 + 0x0101010101010101ULL;\
431 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
432 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
433 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
434 pixels+=line_size;\
435 block +=line_size;\
436 }\
437}\
438\
45553457
ZK
439CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
440CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
441CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
442CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
443CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
444CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
445CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
59fe111e
MN
446
447#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
448#else // 64 bit variant
449
450#define PIXOP2(OPNAME, OP) \
45553457 451static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
59fe111e
MN
452 int i;\
453 for(i=0; i<h; i++){\
454 OP(*((uint32_t*)(block )), LD32(pixels ));\
455 OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
456 pixels+=line_size;\
457 block +=line_size;\
458 }\
459}\
45553457
ZK
460static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
461 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
b3184779 462}\
59fe111e 463\
b3184779
MN
464static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
465 int src_stride1, int src_stride2, int h){\
59fe111e
MN
466 int i;\
467 for(i=0; i<h; i++){\
b3184779
MN
468 uint32_t a,b;\
469 a= LD32(&src1[i*src_stride1 ]);\
470 b= LD32(&src2[i*src_stride2 ]);\
471 OP(*((uint32_t*)&dst[i*dst_stride ]), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\
472 a= LD32(&src1[i*src_stride1+4]);\
473 b= LD32(&src2[i*src_stride2+4]);\
474 OP(*((uint32_t*)&dst[i*dst_stride+4]), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\
59fe111e
MN
475 }\
476}\
477\
b3184779
MN
478static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
479 int src_stride1, int src_stride2, int h){\
59fe111e
MN
480 int i;\
481 for(i=0; i<h; i++){\
b3184779
MN
482 uint32_t a,b;\
483 a= LD32(&src1[i*src_stride1 ]);\
484 b= LD32(&src2[i*src_stride2 ]);\
485 OP(*((uint32_t*)&dst[i*dst_stride ]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\
486 a= LD32(&src1[i*src_stride1+4]);\
487 b= LD32(&src2[i*src_stride2+4]);\
488 OP(*((uint32_t*)&dst[i*dst_stride+4]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\
59fe111e
MN
489 }\
490}\
491\
b3184779
MN
492static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
493 int src_stride1, int src_stride2, int h){\
494 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
495 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
496}\
497\
498static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
499 int src_stride1, int src_stride2, int h){\
500 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
501 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
502}\
503\
45553457 504static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
505 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
506}\
507\
45553457 508static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
509 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
510}\
511\
45553457 512static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
513 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
514}\
515\
45553457 516static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
517 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
518}\
519\
520static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
521 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
59fe111e
MN
522 int i;\
523 for(i=0; i<h; i++){\
b3184779
MN
524 uint32_t a, b, c, d, l0, l1, h0, h1;\
525 a= LD32(&src1[i*src_stride1]);\
526 b= LD32(&src2[i*src_stride2]);\
527 c= LD32(&src3[i*src_stride3]);\
528 d= LD32(&src4[i*src_stride4]);\
529 l0= (a&0x03030303UL)\
530 + (b&0x03030303UL)\
531 + 0x02020202UL;\
532 h0= ((a&0xFCFCFCFCUL)>>2)\
533 + ((b&0xFCFCFCFCUL)>>2);\
534 l1= (c&0x03030303UL)\
535 + (d&0x03030303UL);\
536 h1= ((c&0xFCFCFCFCUL)>>2)\
537 + ((d&0xFCFCFCFCUL)>>2);\
538 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
539 a= LD32(&src1[i*src_stride1+4]);\
540 b= LD32(&src2[i*src_stride2+4]);\
541 c= LD32(&src3[i*src_stride3+4]);\
542 d= LD32(&src4[i*src_stride4+4]);\
543 l0= (a&0x03030303UL)\
544 + (b&0x03030303UL)\
545 + 0x02020202UL;\
546 h0= ((a&0xFCFCFCFCUL)>>2)\
547 + ((b&0xFCFCFCFCUL)>>2);\
548 l1= (c&0x03030303UL)\
549 + (d&0x03030303UL);\
550 h1= ((c&0xFCFCFCFCUL)>>2)\
551 + ((d&0xFCFCFCFCUL)>>2);\
552 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
59fe111e
MN
553 }\
554}\
b3184779
MN
555static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
556 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
59fe111e
MN
557 int i;\
558 for(i=0; i<h; i++){\
b3184779
MN
559 uint32_t a, b, c, d, l0, l1, h0, h1;\
560 a= LD32(&src1[i*src_stride1]);\
561 b= LD32(&src2[i*src_stride2]);\
562 c= LD32(&src3[i*src_stride3]);\
563 d= LD32(&src4[i*src_stride4]);\
564 l0= (a&0x03030303UL)\
565 + (b&0x03030303UL)\
566 + 0x01010101UL;\
567 h0= ((a&0xFCFCFCFCUL)>>2)\
568 + ((b&0xFCFCFCFCUL)>>2);\
569 l1= (c&0x03030303UL)\
570 + (d&0x03030303UL);\
571 h1= ((c&0xFCFCFCFCUL)>>2)\
572 + ((d&0xFCFCFCFCUL)>>2);\
573 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
574 a= LD32(&src1[i*src_stride1+4]);\
575 b= LD32(&src2[i*src_stride2+4]);\
576 c= LD32(&src3[i*src_stride3+4]);\
577 d= LD32(&src4[i*src_stride4+4]);\
578 l0= (a&0x03030303UL)\
579 + (b&0x03030303UL)\
580 + 0x01010101UL;\
581 h0= ((a&0xFCFCFCFCUL)>>2)\
582 + ((b&0xFCFCFCFCUL)>>2);\
583 l1= (c&0x03030303UL)\
584 + (d&0x03030303UL);\
585 h1= ((c&0xFCFCFCFCUL)>>2)\
586 + ((d&0xFCFCFCFCUL)>>2);\
587 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
59fe111e
MN
588 }\
589}\
b3184779
MN
590static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
591 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
592 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
593 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
594}\
595static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
596 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
597 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
598 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
599}\
59fe111e 600\
45553457 601static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
602{\
603 int j;\
604 for(j=0; j<2; j++){\
605 int i;\
606 const uint32_t a= LD32(pixels );\
607 const uint32_t b= LD32(pixels+1);\
608 uint32_t l0= (a&0x03030303UL)\
609 + (b&0x03030303UL)\
610 + 0x02020202UL;\
611 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
612 + ((b&0xFCFCFCFCUL)>>2);\
613 uint32_t l1,h1;\
614\
615 pixels+=line_size;\
616 for(i=0; i<h; i+=2){\
617 uint32_t a= LD32(pixels );\
618 uint32_t b= LD32(pixels+1);\
619 l1= (a&0x03030303UL)\
620 + (b&0x03030303UL);\
621 h1= ((a&0xFCFCFCFCUL)>>2)\
622 + ((b&0xFCFCFCFCUL)>>2);\
623 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
624 pixels+=line_size;\
625 block +=line_size;\
626 a= LD32(pixels );\
627 b= LD32(pixels+1);\
628 l0= (a&0x03030303UL)\
629 + (b&0x03030303UL)\
630 + 0x02020202UL;\
631 h0= ((a&0xFCFCFCFCUL)>>2)\
632 + ((b&0xFCFCFCFCUL)>>2);\
633 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
634 pixels+=line_size;\
635 block +=line_size;\
636 }\
637 pixels+=4-line_size*(h+1);\
638 block +=4-line_size*h;\
639 }\
640}\
641\
45553457 642static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
643{\
644 int j;\
645 for(j=0; j<2; j++){\
646 int i;\
647 const uint32_t a= LD32(pixels );\
648 const uint32_t b= LD32(pixels+1);\
649 uint32_t l0= (a&0x03030303UL)\
650 + (b&0x03030303UL)\
651 + 0x01010101UL;\
652 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
653 + ((b&0xFCFCFCFCUL)>>2);\
654 uint32_t l1,h1;\
655\
656 pixels+=line_size;\
657 for(i=0; i<h; i+=2){\
658 uint32_t a= LD32(pixels );\
659 uint32_t b= LD32(pixels+1);\
660 l1= (a&0x03030303UL)\
661 + (b&0x03030303UL);\
662 h1= ((a&0xFCFCFCFCUL)>>2)\
663 + ((b&0xFCFCFCFCUL)>>2);\
664 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
665 pixels+=line_size;\
666 block +=line_size;\
667 a= LD32(pixels );\
668 b= LD32(pixels+1);\
669 l0= (a&0x03030303UL)\
670 + (b&0x03030303UL)\
671 + 0x01010101UL;\
672 h0= ((a&0xFCFCFCFCUL)>>2)\
673 + ((b&0xFCFCFCFCUL)>>2);\
674 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
675 pixels+=line_size;\
676 block +=line_size;\
677 }\
678 pixels+=4-line_size*(h+1);\
679 block +=4-line_size*h;\
680 }\
681}\
682\
45553457
ZK
683CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
684CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
685CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
686CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
687CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
688CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
689CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
690CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
b3184779 691
59fe111e
MN
692#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
693#endif
59fe111e
MN
694#define op_put(a, b) a = b
695
696PIXOP2(avg, op_avg)
697PIXOP2(put, op_put)
698#undef op_avg
699#undef op_put
700
de6d9b64
FB
701#define avg2(a,b) ((a+b+1)>>1)
702#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
703
073b013d 704
b3184779 705static void gmc1_c(UINT8 *dst, UINT8 *src, int stride, int h, int x16, int y16, int rounder)
44eb4951
MN
706{
707 const int A=(16-x16)*(16-y16);
708 const int B=( x16)*(16-y16);
709 const int C=(16-x16)*( y16);
710 const int D=( x16)*( y16);
711 int i;
44eb4951
MN
712
713 for(i=0; i<h; i++)
714 {
b3184779
MN
715 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
716 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
717 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
718 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
719 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
720 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
721 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
722 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
723 dst+= stride;
724 src+= stride;
44eb4951
MN
725 }
726}
727
073b013d
MN
728static void gmc_c(UINT8 *dst, UINT8 *src, int stride, int h, int ox, int oy,
729 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
730{
731 int y, vx, vy;
732 const int s= 1<<shift;
733
734 width--;
735 height--;
736
737 for(y=0; y<h; y++){
738 int x;
739
740 vx= ox;
741 vy= oy;
742 for(x=0; x<8; x++){ //XXX FIXME optimize
743 int src_x, src_y, frac_x, frac_y, index;
744
745 src_x= vx>>16;
746 src_y= vy>>16;
747 frac_x= src_x&(s-1);
748 frac_y= src_y&(s-1);
749 src_x>>=shift;
750 src_y>>=shift;
751
752 if((unsigned)src_x < width){
753 if((unsigned)src_y < height){
754 index= src_x + src_y*stride;
755 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
756 + src[index +1]* frac_x )*(s-frac_y)
757 + ( src[index+stride ]*(s-frac_x)
758 + src[index+stride+1]* frac_x )* frac_y
759 + r)>>(shift*2);
760 }else{
761 index= src_x + clip(src_y, 0, height)*stride;
762 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
763 + src[index +1]* frac_x )*s
764 + r)>>(shift*2);
765 }
766 }else{
767 if((unsigned)src_y < height){
768 index= clip(src_x, 0, width) + src_y*stride;
769 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
770 + src[index+stride ]* frac_y )*s
771 + r)>>(shift*2);
772 }else{
773 index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride;
774 dst[y*stride + x]= src[index ];
775 }
776 }
777
778 vx+= dxx;
779 vy+= dyx;
780 }
781 ox += dxy;
782 oy += dyy;
783 }
784}
785
b3184779 786static inline void copy_block17(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h)
44eb4951 787{
44eb4951
MN
788 int i;
789 for(i=0; i<h; i++)
790 {
b3184779
MN
791 ST32(dst , LD32(src ));
792 ST32(dst+4 , LD32(src+4 ));
793 ST32(dst+8 , LD32(src+8 ));
794 ST32(dst+12, LD32(src+12));
795 dst[16]= src[16];
44eb4951
MN
796 dst+=dstStride;
797 src+=srcStride;
798 }
799}
800
b3184779 801static inline void copy_block9(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h)
44eb4951
MN
802{
803 int i;
b3184779 804 for(i=0; i<h; i++)
44eb4951 805 {
b3184779
MN
806 ST32(dst , LD32(src ));
807 ST32(dst+4 , LD32(src+4 ));
808 dst[8]= src[8];
44eb4951
MN
809 dst+=dstStride;
810 src+=srcStride;
811 }
812}
813
826f429a 814
b3184779
MN
815#define QPEL_MC(r, OPNAME, RND, OP) \
816static void OPNAME ## mpeg4_qpel8_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h){\
817 UINT8 *cm = cropTbl + MAX_NEG_CROP;\
818 int i;\
819 for(i=0; i<h; i++)\
820 {\
821 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
822 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
823 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
824 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
825 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
826 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
827 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
828 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
829 dst+=dstStride;\
830 src+=srcStride;\
831 }\
44eb4951
MN
832}\
833\
db794953
MN
834static void OPNAME ## mpeg4_qpel8_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride){\
835 const int w=8;\
b3184779
MN
836 UINT8 *cm = cropTbl + MAX_NEG_CROP;\
837 int i;\
838 for(i=0; i<w; i++)\
839 {\
840 const int src0= src[0*srcStride];\
841 const int src1= src[1*srcStride];\
842 const int src2= src[2*srcStride];\
843 const int src3= src[3*srcStride];\
844 const int src4= src[4*srcStride];\
845 const int src5= src[5*srcStride];\
846 const int src6= src[6*srcStride];\
847 const int src7= src[7*srcStride];\
848 const int src8= src[8*srcStride];\
849 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
850 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
851 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
852 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
853 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
854 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
855 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
856 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
857 dst++;\
858 src++;\
859 }\
860}\
861\
862static void OPNAME ## mpeg4_qpel16_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h){\
863 UINT8 *cm = cropTbl + MAX_NEG_CROP;\
864 int i;\
826f429a 865 \
b3184779
MN
866 for(i=0; i<h; i++)\
867 {\
868 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
869 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
870 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
871 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
872 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
873 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
874 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
875 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
876 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
877 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
878 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
879 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
880 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
881 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
882 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
883 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
884 dst+=dstStride;\
885 src+=srcStride;\
886 }\
887}\
888\
826f429a 889static void OPNAME ## mpeg4_qpel16_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride){\
b3184779
MN
890 UINT8 *cm = cropTbl + MAX_NEG_CROP;\
891 int i;\
826f429a 892 const int w=16;\
b3184779
MN
893 for(i=0; i<w; i++)\
894 {\
895 const int src0= src[0*srcStride];\
896 const int src1= src[1*srcStride];\
897 const int src2= src[2*srcStride];\
898 const int src3= src[3*srcStride];\
899 const int src4= src[4*srcStride];\
900 const int src5= src[5*srcStride];\
901 const int src6= src[6*srcStride];\
902 const int src7= src[7*srcStride];\
903 const int src8= src[8*srcStride];\
904 const int src9= src[9*srcStride];\
905 const int src10= src[10*srcStride];\
906 const int src11= src[11*srcStride];\
907 const int src12= src[12*srcStride];\
908 const int src13= src[13*srcStride];\
909 const int src14= src[14*srcStride];\
910 const int src15= src[15*srcStride];\
911 const int src16= src[16*srcStride];\
912 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
913 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
914 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
915 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
916 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
917 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
918 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
919 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
920 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
921 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
922 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
923 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
924 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
925 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
926 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
927 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
928 dst++;\
929 src++;\
930 }\
931}\
932\
933static void OPNAME ## qpel8_mc00_c (UINT8 *dst, UINT8 *src, int stride){\
45553457 934 OPNAME ## pixels8_c(dst, src, stride, 8);\
b3184779
MN
935}\
936\
937static void OPNAME ## qpel8_mc10_c(UINT8 *dst, UINT8 *src, int stride){\
44eb4951 938 UINT8 half[64];\
b3184779
MN
939 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
940 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
44eb4951
MN
941}\
942\
b3184779
MN
943static void OPNAME ## qpel8_mc20_c(UINT8 *dst, UINT8 *src, int stride){\
944 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
44eb4951
MN
945}\
946\
b3184779 947static void OPNAME ## qpel8_mc30_c(UINT8 *dst, UINT8 *src, int stride){\
44eb4951 948 UINT8 half[64];\
b3184779
MN
949 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
950 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
44eb4951
MN
951}\
952\
b3184779
MN
953static void OPNAME ## qpel8_mc01_c(UINT8 *dst, UINT8 *src, int stride){\
954 UINT8 full[16*9];\
44eb4951 955 UINT8 half[64];\
b3184779 956 copy_block9(full, src, 16, stride, 9);\
db794953 957 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
b3184779 958 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
44eb4951
MN
959}\
960\
b3184779
MN
961static void OPNAME ## qpel8_mc02_c(UINT8 *dst, UINT8 *src, int stride){\
962 UINT8 full[16*9];\
963 copy_block9(full, src, 16, stride, 9);\
db794953 964 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
44eb4951
MN
965}\
966\
b3184779
MN
967static void OPNAME ## qpel8_mc03_c(UINT8 *dst, UINT8 *src, int stride){\
968 UINT8 full[16*9];\
44eb4951 969 UINT8 half[64];\
b3184779 970 copy_block9(full, src, 16, stride, 9);\
db794953 971 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
b3184779 972 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
44eb4951 973}\
db794953 974void ff_ ## OPNAME ## qpel8_mc11_old_c(UINT8 *dst, UINT8 *src, int stride){\
b3184779 975 UINT8 full[16*9];\
44eb4951 976 UINT8 halfH[72];\
7ff037e9 977 UINT8 halfV[64];\
44eb4951 978 UINT8 halfHV[64];\
b3184779
MN
979 copy_block9(full, src, 16, stride, 9);\
980 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
981 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
982 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 983 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 984}\
db794953
MN
985static void OPNAME ## qpel8_mc11_c(UINT8 *dst, UINT8 *src, int stride){\
986 UINT8 full[16*9];\
987 UINT8 halfH[72];\
988 UINT8 halfHV[64];\
989 copy_block9(full, src, 16, stride, 9);\
990 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
991 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
992 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
993 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
994}\
995void ff_ ## OPNAME ## qpel8_mc31_old_c(UINT8 *dst, UINT8 *src, int stride){\
b3184779 996 UINT8 full[16*9];\
44eb4951 997 UINT8 halfH[72];\
7ff037e9 998 UINT8 halfV[64];\
44eb4951 999 UINT8 halfHV[64];\
b3184779
MN
1000 copy_block9(full, src, 16, stride, 9);\
1001 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1002 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1003 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1004 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1005}\
db794953
MN
1006static void OPNAME ## qpel8_mc31_c(UINT8 *dst, UINT8 *src, int stride){\
1007 UINT8 full[16*9];\
1008 UINT8 halfH[72];\
1009 UINT8 halfHV[64];\
1010 copy_block9(full, src, 16, stride, 9);\
1011 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1012 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1013 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1014 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1015}\
1016void ff_ ## OPNAME ## qpel8_mc13_old_c(UINT8 *dst, UINT8 *src, int stride){\
b3184779 1017 UINT8 full[16*9];\
44eb4951 1018 UINT8 halfH[72];\
7ff037e9 1019 UINT8 halfV[64];\
44eb4951 1020 UINT8 halfHV[64];\
b3184779
MN
1021 copy_block9(full, src, 16, stride, 9);\
1022 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1023 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1024 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1025 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1026}\
db794953
MN
1027static void OPNAME ## qpel8_mc13_c(UINT8 *dst, UINT8 *src, int stride){\
1028 UINT8 full[16*9];\
1029 UINT8 halfH[72];\
1030 UINT8 halfHV[64];\
1031 copy_block9(full, src, 16, stride, 9);\
1032 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1033 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1034 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1035 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1036}\
1037void ff_ ## OPNAME ## qpel8_mc33_old_c(UINT8 *dst, UINT8 *src, int stride){\
b3184779 1038 UINT8 full[16*9];\
44eb4951 1039 UINT8 halfH[72];\
7ff037e9 1040 UINT8 halfV[64];\
44eb4951 1041 UINT8 halfHV[64];\
b3184779
MN
1042 copy_block9(full, src, 16, stride, 9);\
1043 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
db794953
MN
1044 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1045 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1046 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1047}\
db794953
MN
1048static void OPNAME ## qpel8_mc33_c(UINT8 *dst, UINT8 *src, int stride){\
1049 UINT8 full[16*9];\
1050 UINT8 halfH[72];\
1051 UINT8 halfHV[64];\
1052 copy_block9(full, src, 16, stride, 9);\
1053 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1054 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1055 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1056 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1057}\
b3184779 1058static void OPNAME ## qpel8_mc21_c(UINT8 *dst, UINT8 *src, int stride){\
44eb4951
MN
1059 UINT8 halfH[72];\
1060 UINT8 halfHV[64];\
b3184779 1061 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1062 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1063 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
44eb4951 1064}\
b3184779 1065static void OPNAME ## qpel8_mc23_c(UINT8 *dst, UINT8 *src, int stride){\
44eb4951
MN
1066 UINT8 halfH[72];\
1067 UINT8 halfHV[64];\
b3184779 1068 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1069 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1070 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
44eb4951 1071}\
db794953 1072void ff_ ## OPNAME ## qpel8_mc12_old_c(UINT8 *dst, UINT8 *src, int stride){\
b3184779 1073 UINT8 full[16*9];\
44eb4951 1074 UINT8 halfH[72];\
7ff037e9 1075 UINT8 halfV[64];\
44eb4951 1076 UINT8 halfHV[64];\
b3184779
MN
1077 copy_block9(full, src, 16, stride, 9);\
1078 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1079 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1080 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1081 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 1082}\
db794953
MN
1083static void OPNAME ## qpel8_mc12_c(UINT8 *dst, UINT8 *src, int stride){\
1084 UINT8 full[16*9];\
1085 UINT8 halfH[72];\
1086 copy_block9(full, src, 16, stride, 9);\
1087 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1088 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1089 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1090}\
1091void ff_ ## OPNAME ## qpel8_mc32_old_c(UINT8 *dst, UINT8 *src, int stride){\
b3184779 1092 UINT8 full[16*9];\
44eb4951 1093 UINT8 halfH[72];\
7ff037e9 1094 UINT8 halfV[64];\
44eb4951 1095 UINT8 halfHV[64];\
b3184779
MN
1096 copy_block9(full, src, 16, stride, 9);\
1097 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1098 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1099 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1100 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 1101}\
db794953
MN
1102static void OPNAME ## qpel8_mc32_c(UINT8 *dst, UINT8 *src, int stride){\
1103 UINT8 full[16*9];\
1104 UINT8 halfH[72];\
1105 copy_block9(full, src, 16, stride, 9);\
1106 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1107 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1108 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1109}\
b3184779 1110static void OPNAME ## qpel8_mc22_c(UINT8 *dst, UINT8 *src, int stride){\
44eb4951 1111 UINT8 halfH[72];\
b3184779 1112 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1113 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
b3184779
MN
1114}\
1115static void OPNAME ## qpel16_mc00_c (UINT8 *dst, UINT8 *src, int stride){\
45553457 1116 OPNAME ## pixels16_c(dst, src, stride, 16);\
b3184779
MN
1117}\
1118\
1119static void OPNAME ## qpel16_mc10_c(UINT8 *dst, UINT8 *src, int stride){\
1120 UINT8 half[256];\
1121 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1122 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
1123}\
1124\
1125static void OPNAME ## qpel16_mc20_c(UINT8 *dst, UINT8 *src, int stride){\
1126 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
44eb4951 1127}\
b3184779
MN
1128\
1129static void OPNAME ## qpel16_mc30_c(UINT8 *dst, UINT8 *src, int stride){\
1130 UINT8 half[256];\
1131 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1132 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
1133}\
1134\
1135static void OPNAME ## qpel16_mc01_c(UINT8 *dst, UINT8 *src, int stride){\
1136 UINT8 full[24*17];\
1137 UINT8 half[256];\
1138 copy_block17(full, src, 24, stride, 17);\
826f429a 1139 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
b3184779
MN
1140 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
1141}\
1142\
1143static void OPNAME ## qpel16_mc02_c(UINT8 *dst, UINT8 *src, int stride){\
1144 UINT8 full[24*17];\
1145 copy_block17(full, src, 24, stride, 17);\
826f429a 1146 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
b3184779
MN
1147}\
1148\
1149static void OPNAME ## qpel16_mc03_c(UINT8 *dst, UINT8 *src, int stride){\
1150 UINT8 full[24*17];\
1151 UINT8 half[256];\
1152 copy_block17(full, src, 24, stride, 17);\
826f429a 1153 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
b3184779
MN
1154 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
1155}\
db794953 1156void ff_ ## OPNAME ## qpel16_mc11_old_c(UINT8 *dst, UINT8 *src, int stride){\
b3184779
MN
1157 UINT8 full[24*17];\
1158 UINT8 halfH[272];\
1159 UINT8 halfV[256];\
1160 UINT8 halfHV[256];\
1161 copy_block17(full, src, 24, stride, 17);\
1162 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1163 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1164 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1165 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1166}\
db794953
MN
1167static void OPNAME ## qpel16_mc11_c(UINT8 *dst, UINT8 *src, int stride){\
1168 UINT8 full[24*17];\
1169 UINT8 halfH[272];\
1170 UINT8 halfHV[256];\
1171 copy_block17(full, src, 24, stride, 17);\
1172 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1173 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1174 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1175 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1176}\
1177void ff_ ## OPNAME ## qpel16_mc31_old_c(UINT8 *dst, UINT8 *src, int stride){\
b3184779
MN
1178 UINT8 full[24*17];\
1179 UINT8 halfH[272];\
1180 UINT8 halfV[256];\
1181 UINT8 halfHV[256];\
1182 copy_block17(full, src, 24, stride, 17);\
1183 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1184 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1185 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1186 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1187}\
db794953
MN
1188static void OPNAME ## qpel16_mc31_c(UINT8 *dst, UINT8 *src, int stride){\
1189 UINT8 full[24*17];\
1190 UINT8 halfH[272];\
1191 UINT8 halfHV[256];\
1192 copy_block17(full, src, 24, stride, 17);\
1193 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1194 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1195 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1196 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1197}\
1198void ff_ ## OPNAME ## qpel16_mc13_old_c(UINT8 *dst, UINT8 *src, int stride){\
b3184779
MN
1199 UINT8 full[24*17];\
1200 UINT8 halfH[272];\
1201 UINT8 halfV[256];\
1202 UINT8 halfHV[256];\
1203 copy_block17(full, src, 24, stride, 17);\
1204 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1205 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1206 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1207 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1208}\
db794953
MN
1209static void OPNAME ## qpel16_mc13_c(UINT8 *dst, UINT8 *src, int stride){\
1210 UINT8 full[24*17];\
1211 UINT8 halfH[272];\
1212 UINT8 halfHV[256];\
1213 copy_block17(full, src, 24, stride, 17);\
1214 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1215 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1216 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1217 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1218}\
1219void ff_ ## OPNAME ## qpel16_mc33_old_c(UINT8 *dst, UINT8 *src, int stride){\
b3184779
MN
1220 UINT8 full[24*17];\
1221 UINT8 halfH[272];\
1222 UINT8 halfV[256];\
1223 UINT8 halfHV[256];\
1224 copy_block17(full, src, 24, stride, 17);\
1225 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
826f429a
MN
1226 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1227 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1228 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1229}\
db794953
MN
1230static void OPNAME ## qpel16_mc33_c(UINT8 *dst, UINT8 *src, int stride){\
1231 UINT8 full[24*17];\
1232 UINT8 halfH[272];\
1233 UINT8 halfHV[256];\
1234 copy_block17(full, src, 24, stride, 17);\
1235 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1236 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1237 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1238 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1239}\
b3184779
MN
1240static void OPNAME ## qpel16_mc21_c(UINT8 *dst, UINT8 *src, int stride){\
1241 UINT8 halfH[272];\
1242 UINT8 halfHV[256];\
1243 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1244 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1245 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1246}\
1247static void OPNAME ## qpel16_mc23_c(UINT8 *dst, UINT8 *src, int stride){\
1248 UINT8 halfH[272];\
1249 UINT8 halfHV[256];\
1250 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1251 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1252 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1253}\
db794953 1254void ff_ ## OPNAME ## qpel16_mc12_old_c(UINT8 *dst, UINT8 *src, int stride){\
b3184779
MN
1255 UINT8 full[24*17];\
1256 UINT8 halfH[272];\
1257 UINT8 halfV[256];\
1258 UINT8 halfHV[256];\
1259 copy_block17(full, src, 24, stride, 17);\
1260 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1261 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1262 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1263 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
1264}\
db794953
MN
1265static void OPNAME ## qpel16_mc12_c(UINT8 *dst, UINT8 *src, int stride){\
1266 UINT8 full[24*17];\
1267 UINT8 halfH[272];\
1268 copy_block17(full, src, 24, stride, 17);\
1269 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1270 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1271 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1272}\
1273void ff_ ## OPNAME ## qpel16_mc32_old_c(UINT8 *dst, UINT8 *src, int stride){\
b3184779
MN
1274 UINT8 full[24*17];\
1275 UINT8 halfH[272];\
1276 UINT8 halfV[256];\
1277 UINT8 halfHV[256];\
1278 copy_block17(full, src, 24, stride, 17);\
1279 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1280 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1281 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1282 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
1283}\
db794953
MN
1284static void OPNAME ## qpel16_mc32_c(UINT8 *dst, UINT8 *src, int stride){\
1285 UINT8 full[24*17];\
1286 UINT8 halfH[272];\
1287 copy_block17(full, src, 24, stride, 17);\
1288 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1289 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1290 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1291}\
b3184779
MN
1292static void OPNAME ## qpel16_mc22_c(UINT8 *dst, UINT8 *src, int stride){\
1293 UINT8 halfH[272];\
1294 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1295 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
45553457 1296}
44eb4951 1297
b3184779
MN
1298#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1299#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1300#define op_put(a, b) a = cm[((b) + 16)>>5]
1301#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1302
1303QPEL_MC(0, put_ , _ , op_put)
1304QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1305QPEL_MC(0, avg_ , _ , op_avg)
1306//QPEL_MC(1, avg_no_rnd , _ , op_avg)
1307#undef op_avg
1308#undef op_avg_no_rnd
1309#undef op_put
1310#undef op_put_no_rnd
44eb4951 1311
1457ab52
MN
1312static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
1313 uint8_t *cm = cropTbl + MAX_NEG_CROP;
1314 int i;
1315
1316 for(i=0; i<h; i++){
1317 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1318 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1319 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1320 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1321 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1322 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1323 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1324 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1325 dst+=dstStride;
1326 src+=srcStride;
1327 }
1328}
1329
1330static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
1331 uint8_t *cm = cropTbl + MAX_NEG_CROP;
1332 int i;
1333
1334 for(i=0; i<w; i++){
1335 const int src_1= src[ -srcStride];
1336 const int src0 = src[0 ];
1337 const int src1 = src[ srcStride];
1338 const int src2 = src[2*srcStride];
1339 const int src3 = src[3*srcStride];
1340 const int src4 = src[4*srcStride];
1341 const int src5 = src[5*srcStride];
1342 const int src6 = src[6*srcStride];
1343 const int src7 = src[7*srcStride];
1344 const int src8 = src[8*srcStride];
1345 const int src9 = src[9*srcStride];
1346 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1347 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1348 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1349 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1350 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1351 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1352 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1353 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1354 src++;
1355 dst++;
1356 }
1357}
1358
1359static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
1360 put_pixels8_c(dst, src, stride, 8);
1361}
1362
1363static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
1364 uint8_t half[64];
1365 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1366 put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
1367}
1368
1369static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
1370 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
1371}
1372
1373static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
1374 uint8_t half[64];
1375 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1376 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
1377}
1378
1379static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
1380 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
1381}
1382
1383static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
1384 uint8_t halfH[88];
1385 uint8_t halfV[64];
1386 uint8_t halfHV[64];
1387 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1388 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1389 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1390 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
1391}
1392static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
1393 uint8_t halfH[88];
1394 uint8_t halfV[64];
1395 uint8_t halfHV[64];
1396 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1397 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1398 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1399 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
1400}
1401static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
1402 uint8_t halfH[88];
1403 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1404 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
1405}
1406
1407
1408static inline int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
de6d9b64
FB
1409{
1410 int s, i;
1411
1412 s = 0;
ba6802de 1413 for(i=0;i<16;i++) {
de6d9b64
FB
1414 s += abs(pix1[0] - pix2[0]);
1415 s += abs(pix1[1] - pix2[1]);
1416 s += abs(pix1[2] - pix2[2]);
1417 s += abs(pix1[3] - pix2[3]);
1418 s += abs(pix1[4] - pix2[4]);
1419 s += abs(pix1[5] - pix2[5]);
1420 s += abs(pix1[6] - pix2[6]);
1421 s += abs(pix1[7] - pix2[7]);
1422 s += abs(pix1[8] - pix2[8]);
1423 s += abs(pix1[9] - pix2[9]);
1424 s += abs(pix1[10] - pix2[10]);
1425 s += abs(pix1[11] - pix2[11]);
1426 s += abs(pix1[12] - pix2[12]);
1427 s += abs(pix1[13] - pix2[13]);
1428 s += abs(pix1[14] - pix2[14]);
1429 s += abs(pix1[15] - pix2[15]);
1430 pix1 += line_size;
1431 pix2 += line_size;
1432 }
1433 return s;
1434}
1435
eb4b3dd3 1436static int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
de6d9b64
FB
1437{
1438 int s, i;
1439
1440 s = 0;
ba6802de 1441 for(i=0;i<16;i++) {
de6d9b64
FB
1442 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1443 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1444 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1445 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1446 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1447 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1448 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1449 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1450 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
1451 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
1452 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
1453 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
1454 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
1455 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
1456 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
1457 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
1458 pix1 += line_size;
1459 pix2 += line_size;
1460 }
1461 return s;
1462}
1463
eb4b3dd3 1464static int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
de6d9b64
FB
1465{
1466 int s, i;
1467 UINT8 *pix3 = pix2 + line_size;
1468
1469 s = 0;
ba6802de 1470 for(i=0;i<16;i++) {
de6d9b64
FB
1471 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1472 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1473 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1474 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1475 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1476 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1477 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1478 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1479 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
1480 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
1481 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
1482 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
1483 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
1484 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
1485 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
1486 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
1487 pix1 += line_size;
1488 pix2 += line_size;
1489 pix3 += line_size;
1490 }
1491 return s;
1492}
1493
eb4b3dd3 1494static int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
de6d9b64
FB
1495{
1496 int s, i;
1497 UINT8 *pix3 = pix2 + line_size;
1498
1499 s = 0;
ba6802de 1500 for(i=0;i<16;i++) {
de6d9b64
FB
1501 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1502 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1503 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1504 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1505 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1506 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1507 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1508 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1509 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1510 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1511 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1512 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1513 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1514 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1515 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1516 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1517 pix1 += line_size;
1518 pix2 += line_size;
1519 pix3 += line_size;
1520 }
1521 return s;
1522}
1523
1457ab52 1524static inline int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
ba6802de
MN
1525{
1526 int s, i;
1527
1528 s = 0;
1529 for(i=0;i<8;i++) {
1530 s += abs(pix1[0] - pix2[0]);
1531 s += abs(pix1[1] - pix2[1]);
1532 s += abs(pix1[2] - pix2[2]);
1533 s += abs(pix1[3] - pix2[3]);
1534 s += abs(pix1[4] - pix2[4]);
1535 s += abs(pix1[5] - pix2[5]);
1536 s += abs(pix1[6] - pix2[6]);
1537 s += abs(pix1[7] - pix2[7]);
1538 pix1 += line_size;
1539 pix2 += line_size;
1540 }
1541 return s;
1542}
1543
eb4b3dd3 1544static int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
ba6802de
MN
1545{
1546 int s, i;
1547
1548 s = 0;
1549 for(i=0;i<8;i++) {
1550 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1551 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1552 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1553 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1554 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1555 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1556 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1557 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1558 pix1 += line_size;
1559 pix2 += line_size;
1560 }
1561 return s;
1562}
1563
eb4b3dd3 1564static int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
ba6802de
MN
1565{
1566 int s, i;
1567 UINT8 *pix3 = pix2 + line_size;
1568
1569 s = 0;
1570 for(i=0;i<8;i++) {
1571 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1572 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1573 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1574 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1575 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1576 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1577 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1578 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1579 pix1 += line_size;
1580 pix2 += line_size;
1581 pix3 += line_size;
1582 }
1583 return s;
1584}
1585
eb4b3dd3 1586static int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
ba6802de
MN
1587{
1588 int s, i;
1589 UINT8 *pix3 = pix2 + line_size;
1590
1591 s = 0;
1592 for(i=0;i<8;i++) {
1593 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1594 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1595 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1596 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1597 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1598 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1599 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1600 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1601 pix1 += line_size;
1602 pix2 += line_size;
1603 pix3 += line_size;
1604 }
1605 return s;
1606}
1607
1457ab52
MN
1608static int sad16x16_c(void *s, uint8_t *a, uint8_t *b, int stride){
1609 return pix_abs16x16_c(a,b,stride);
1610}
1611
1612static int sad8x8_c(void *s, uint8_t *a, uint8_t *b, int stride){
1613 return pix_abs8x8_c(a,b,stride);
1614}
1615
477ab036 1616void ff_block_permute(DCTELEM *block, UINT8 *permutation, const UINT8 *scantable, int last)
d962f6fd 1617{
7801d21d 1618 int i;
477ab036 1619 DCTELEM temp[64];
7801d21d
MN
1620
1621 if(last<=0) return;
9a7b310d 1622 //if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms
d962f6fd 1623
7801d21d
MN
1624 for(i=0; i<=last; i++){
1625 const int j= scantable[i];
1626 temp[j]= block[j];
1627 block[j]=0;
1628 }
1629
1630 for(i=0; i<=last; i++){
1631 const int j= scantable[i];
1632 const int perm_j= permutation[j];
1633 block[perm_j]= temp[j];
1634 }
d962f6fd 1635}
e0eac44e 1636
eb4b3dd3 1637static void clear_blocks_c(DCTELEM *blocks)
649c00c9
MN
1638{
1639 memset(blocks, 0, sizeof(DCTELEM)*6*64);
1640}
1641
11f18faf
MN
1642static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
1643 int i;
d32ac509 1644 for(i=0; i+7<w; i+=8){
11f18faf
MN
1645 dst[i+0] += src[i+0];
1646 dst[i+1] += src[i+1];
1647 dst[i+2] += src[i+2];
1648 dst[i+3] += src[i+3];
1649 dst[i+4] += src[i+4];
1650 dst[i+5] += src[i+5];
1651 dst[i+6] += src[i+6];
1652 dst[i+7] += src[i+7];
1653 }
1654 for(; i<w; i++)
1655 dst[i+0] += src[i+0];
1656}
1657
1658static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
1659 int i;
d32ac509 1660 for(i=0; i+7<w; i+=8){
11f18faf
MN
1661 dst[i+0] = src1[i+0]-src2[i+0];
1662 dst[i+1] = src1[i+1]-src2[i+1];
1663 dst[i+2] = src1[i+2]-src2[i+2];
1664 dst[i+3] = src1[i+3]-src2[i+3];
1665 dst[i+4] = src1[i+4]-src2[i+4];
1666 dst[i+5] = src1[i+5]-src2[i+5];
1667 dst[i+6] = src1[i+6]-src2[i+6];
1668 dst[i+7] = src1[i+7]-src2[i+7];
1669 }
1670 for(; i<w; i++)
1671 dst[i+0] = src1[i+0]-src2[i+0];
1672}
1673
1457ab52
MN
1674#define BUTTERFLY2(o1,o2,i1,i2) \
1675o1= (i1)+(i2);\
1676o2= (i1)-(i2);
1677
1678#define BUTTERFLY1(x,y) \
1679{\
1680 int a,b;\
1681 a= x;\
1682 b= y;\
1683 x= a+b;\
1684 y= a-b;\
1685}
1686
1687#define BUTTERFLYA(x,y) (ABS((x)+(y)) + ABS((x)-(y)))
1688
1689static int hadamard8_diff_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride){
1690 int i;
1691 int temp[64];
1692 int sum=0;
1693
1694 for(i=0; i<8; i++){
1695 //FIXME try pointer walks
1696 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
1697 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
1698 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
1699 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
1700
1701 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
1702 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
1703 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
1704 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
1705
1706 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
1707 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
1708 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
1709 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
1710 }
1711
1712 for(i=0; i<8; i++){
1713 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
1714 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
1715 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
1716 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
1717
1718 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
1719 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
1720 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
1721 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
1722
1723 sum +=
1724 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
1725 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
1726 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
1727 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
1728 }
1729#if 0
1730static int maxi=0;
1731if(sum>maxi){
1732 maxi=sum;
1733 printf("MAX:%d\n", maxi);
1734}
1735#endif
1736 return sum;
1737}
1738
1739static int hadamard8_abs_c(uint8_t *src, int stride, int mean){
1740 int i;
1741 int temp[64];
1742 int sum=0;
1743//FIXME OOOPS ignore 0 term instead of mean mess
1744 for(i=0; i<8; i++){
1745 //FIXME try pointer walks
1746 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-mean,src[stride*i+1]-mean);
1747 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-mean,src[stride*i+3]-mean);
1748 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-mean,src[stride*i+5]-mean);
1749 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-mean,src[stride*i+7]-mean);
1750
1751 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
1752 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
1753 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
1754 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
1755
1756 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
1757 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
1758 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
1759 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
1760 }
1761
1762 for(i=0; i<8; i++){
1763 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
1764 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
1765 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
1766 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
1767
1768 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
1769 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
1770 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
1771 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
1772
1773 sum +=
1774 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
1775 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
1776 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
1777 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
1778 }
1779
1780 return sum;
1781}
1782
1783static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
1784 MpegEncContext * const s= (MpegEncContext *)c;
76fbb024
MN
1785 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
1786 DCTELEM * const temp= (DCTELEM*)aligned_temp;
1457ab52
MN
1787 int sum=0, i;
1788
1789 s->dsp.diff_pixels(temp, src1, src2, stride);
1790 s->fdct(temp);
1791
1792 for(i=0; i<64; i++)
1793 sum+= ABS(temp[i]);
1794
1795 return sum;
1796}
1797
0e15384d 1798void simple_idct(DCTELEM *block); //FIXME
1457ab52
MN
1799
1800static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
1801 MpegEncContext * const s= (MpegEncContext *)c;
76fbb024
MN
1802 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64*2/8];
1803 DCTELEM * const temp= (DCTELEM*)aligned_temp;
1804 DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64;
1457ab52
MN
1805 int sum=0, i;
1806
1807 s->mb_intra=0;
1808
1809 s->dsp.diff_pixels(temp, src1, src2, stride);
1810
1811 memcpy(bak, temp, 64*sizeof(DCTELEM));
1812
67725183 1813 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
1457ab52
MN
1814 s->dct_unquantize(s, temp, 0, s->qscale);
1815 simple_idct(temp); //FIXME
1816
1817 for(i=0; i<64; i++)
1818 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
1819
1820 return sum;
1821}
1822
3a87ac94
MN
1823static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
1824 MpegEncContext * const s= (MpegEncContext *)c;
1825 const UINT8 *scantable= s->intra_scantable.permutated;
76fbb024
MN
1826 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
1827 uint64_t __align8 aligned_bak[stride];
1828 DCTELEM * const temp= (DCTELEM*)aligned_temp;
1829 uint8_t * const bak= (uint8_t*)aligned_bak;
3a87ac94
MN
1830 int i, last, run, bits, level, distoration, start_i;
1831 const int esc_length= s->ac_esc_length;
1832 uint8_t * length;
1833 uint8_t * last_length;
67725183
MN
1834
1835 for(i=0; i<8; i++){
1836 ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0];
1837 ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1];
1838 }
3a87ac94 1839
67725183
MN
1840 s->dsp.diff_pixels(temp, src1, src2, stride);
1841
1842 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
1843
1844 bits=0;
3a87ac94
MN
1845
1846 if (s->mb_intra) {
67725183 1847 start_i = 1;
3a87ac94
MN
1848 length = s->intra_ac_vlc_length;
1849 last_length= s->intra_ac_vlc_last_length;
67725183 1850 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3a87ac94
MN
1851 } else {
1852 start_i = 0;
1853 length = s->inter_ac_vlc_length;
1854 last_length= s->inter_ac_vlc_last_length;
1855 }
3a87ac94 1856
67725183 1857 if(last>=start_i){
3a87ac94
MN
1858 run=0;
1859 for(i=start_i; i<last; i++){
1860 int j= scantable[i];
1861 level= temp[j];
1862
1863 if(level){
1864 level+=64;
1865 if((level&(~127)) == 0){
1866 bits+= length[UNI_AC_ENC_INDEX(run, level)];
1867 }else
1868 bits+= esc_length;
1869 run=0;
1870 }else
1871 run++;
1872 }
1873 i= scantable[last];
1d0eab1d 1874
3a87ac94 1875 level= temp[i] + 64;
1d0eab1d
MN
1876
1877 assert(level - 64);
1878
3a87ac94
MN
1879 if((level&(~127)) == 0){
1880 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
1881 }else
1882 bits+= esc_length;
1883
67725183
MN
1884 }
1885
1886 if(last>=0){
3a87ac94
MN
1887 s->dct_unquantize(s, temp, 0, s->qscale);
1888 }
1889
1890 s->idct_add(bak, stride, temp);
1891
1892 distoration= s->dsp.sse[1](NULL, bak, src1, stride);
1893
67725183 1894 return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7);
3a87ac94
MN
1895}
1896
1897static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
1898 MpegEncContext * const s= (MpegEncContext *)c;
1899 const UINT8 *scantable= s->intra_scantable.permutated;
76fbb024
MN
1900 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
1901 DCTELEM * const temp= (DCTELEM*)aligned_temp;
3a87ac94
MN
1902 int i, last, run, bits, level, start_i;
1903 const int esc_length= s->ac_esc_length;
1904 uint8_t * length;
1905 uint8_t * last_length;
67725183
MN
1906
1907 s->dsp.diff_pixels(temp, src1, src2, stride);
3a87ac94 1908
67725183
MN
1909 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
1910
1911 bits=0;
3a87ac94
MN
1912
1913 if (s->mb_intra) {
67725183 1914 start_i = 1;
3a87ac94
MN
1915 length = s->intra_ac_vlc_length;
1916 last_length= s->intra_ac_vlc_last_length;
67725183 1917 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3a87ac94
MN
1918 } else {
1919 start_i = 0;
1920 length = s->inter_ac_vlc_length;
1921 last_length= s->inter_ac_vlc_last_length;
1922 }
3a87ac94 1923
67725183 1924 if(last>=start_i){
3a87ac94
MN
1925 run=0;
1926 for(i=start_i; i<last; i++){
1927 int j= scantable[i];
1928 level= temp[j];
1929
1930 if(level){
1931 level+=64;
1932 if((level&(~127)) == 0){
1933 bits+= length[UNI_AC_ENC_INDEX(run, level)];
1934 }else
1935 bits+= esc_length;
1936 run=0;
1937 }else
1938 run++;
1939 }
1940 i= scantable[last];
67725183
MN
1941
1942 level= temp[i] + 64;
3a87ac94 1943
67725183 1944 assert(level - 64);
3a87ac94 1945
3a87ac94
MN
1946 if((level&(~127)) == 0){
1947 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
1948 }else
1949 bits+= esc_length;
1950 }
1951
1952 return bits;
1953}
1954
1955
1457ab52
MN
1956WARPER88_1616(hadamard8_diff_c, hadamard8_diff16_c)
1957WARPER88_1616(dct_sad8x8_c, dct_sad16x16_c)
1958WARPER88_1616(quant_psnr8x8_c, quant_psnr16x16_c)
3a87ac94
MN
1959WARPER88_1616(rd8x8_c, rd16x16_c)
1960WARPER88_1616(bit8x8_c, bit16x16_c)
1457ab52 1961
eb4b3dd3 1962void dsputil_init(DSPContext* c, unsigned mask)
e0eac44e 1963{
5abd509a 1964 static int init_done = 0;
d2975f8d 1965 int i;
e0eac44e 1966
5abd509a
ZK
1967 if (!init_done) {
1968 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
1969 for(i=0;i<MAX_NEG_CROP;i++) {
1970 cropTbl[i] = 0;
1971 cropTbl[i + MAX_NEG_CROP + 256] = 255;
1972 }
de6d9b64 1973
5abd509a
ZK
1974 for(i=0;i<512;i++) {
1975 squareTbl[i] = (i - 256) * (i - 256);
1976 }
92ddb692
ZK
1977
1978 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
1979
1980 init_done = 1;
de6d9b64
FB
1981 }
1982
eb4b3dd3
ZK
1983 c->get_pixels = get_pixels_c;
1984 c->diff_pixels = diff_pixels_c;
1985 c->put_pixels_clamped = put_pixels_clamped_c;
1986 c->add_pixels_clamped = add_pixels_clamped_c;
1987 c->gmc1 = gmc1_c;
1988 c->gmc = gmc_c;
1989 c->clear_blocks = clear_blocks_c;
1990 c->pix_sum = pix_sum_c;
1991 c->pix_norm1 = pix_norm1_c;
1457ab52
MN
1992 c->sse[0]= sse16_c;
1993 c->sse[1]= sse8_c;
eb4b3dd3 1994
45553457 1995 /* TODO [0] 16 [1] 8 */
eb4b3dd3
ZK
1996 c->pix_abs16x16 = pix_abs16x16_c;
1997 c->pix_abs16x16_x2 = pix_abs16x16_x2_c;
1998 c->pix_abs16x16_y2 = pix_abs16x16_y2_c;
1999 c->pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
2000 c->pix_abs8x8 = pix_abs8x8_c;
2001 c->pix_abs8x8_x2 = pix_abs8x8_x2_c;
2002 c->pix_abs8x8_y2 = pix_abs8x8_y2_c;
2003 c->pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
2004
45553457
ZK
2005#define dspfunc(PFX, IDX, NUM) \
2006 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
2007 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \
2008 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \
2009 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
2010
2011 dspfunc(put, 0, 16);
2012 dspfunc(put_no_rnd, 0, 16);
2013 dspfunc(put, 1, 8);
2014 dspfunc(put_no_rnd, 1, 8);
2015
2016 dspfunc(avg, 0, 16);
2017 dspfunc(avg_no_rnd, 0, 16);
2018 dspfunc(avg, 1, 8);
2019 dspfunc(avg_no_rnd, 1, 8);
2020#undef dspfunc
2021
2022#define dspfunc(PFX, IDX, NUM) \
2023 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2024 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2025 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2026 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2027 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2028 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2029 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2030 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2031 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2032 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2033 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2034 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2035 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2036 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2037 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2038 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2039
2040 dspfunc(put_qpel, 0, 16);
2041 dspfunc(put_no_rnd_qpel, 0, 16);
2042
2043 dspfunc(avg_qpel, 0, 16);
2044 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
2045
2046 dspfunc(put_qpel, 1, 8);
2047 dspfunc(put_no_rnd_qpel, 1, 8);
2048
2049 dspfunc(avg_qpel, 1, 8);
2050 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
2051#undef dspfunc
c9a2ebc4 2052
1457ab52
MN
2053 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
2054 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
2055 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
2056 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
2057 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
2058 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
2059 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
2060 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
2061
2062 c->hadamard8_diff[0]= hadamard8_diff16_c;
2063 c->hadamard8_diff[1]= hadamard8_diff_c;
2064 c->hadamard8_abs = hadamard8_abs_c;
2065
2066 c->dct_sad[0]= dct_sad16x16_c;
2067 c->dct_sad[1]= dct_sad8x8_c;
2068
2069 c->sad[0]= sad16x16_c;
2070 c->sad[1]= sad8x8_c;
2071
2072 c->quant_psnr[0]= quant_psnr16x16_c;
2073 c->quant_psnr[1]= quant_psnr8x8_c;
3a87ac94
MN
2074
2075 c->rd[0]= rd16x16_c;
2076 c->rd[1]= rd8x8_c;
2077
2078 c->bit[0]= bit16x16_c;
2079 c->bit[1]= bit8x8_c;
2080
11f18faf
MN
2081 c->add_bytes= add_bytes_c;
2082 c->diff_bytes= diff_bytes_c;
2083
980fc7b8 2084#ifdef HAVE_MMX
eb4b3dd3 2085 dsputil_init_mmx(c, mask);
34dfe896
ZK
2086 if (ff_bit_exact)
2087 {
2088 /* FIXME - AVCodec context should have flag for bitexact match */
2089 /* fprintf(stderr, "\n\n\nff_bit_exact %d\n\n\n\n", ff_bit_exact); */
2090 dsputil_set_bit_exact_mmx(c, mask);
2091 }
de6d9b64 2092#endif
3d03c0a2 2093#ifdef ARCH_ARMV4L
eb4b3dd3 2094 dsputil_init_armv4l(c, mask);
3d03c0a2 2095#endif
c34270f5 2096#ifdef HAVE_MLIB
eb4b3dd3 2097 dsputil_init_mlib(c, mask);
c34270f5 2098#endif
1e98dffb 2099#ifdef ARCH_ALPHA
eb4b3dd3 2100 dsputil_init_alpha(c, mask);
1e98dffb 2101#endif
59925ef2 2102#ifdef ARCH_POWERPC
eb4b3dd3 2103 dsputil_init_ppc(c, mask);
a43bd1d7 2104#endif
d46aba26 2105#ifdef HAVE_MMI
eb4b3dd3 2106 dsputil_init_mmi(c, mask);
d46aba26 2107#endif
de6d9b64 2108}
43f1708f 2109
57060b1e
FB
2110/* remove any non bit exact operation (testing purpose) */
2111void avcodec_set_bit_exact(void)
2112{
5596c60c 2113 ff_bit_exact=1;
57060b1e 2114#ifdef HAVE_MMX
34dfe896 2115// FIXME - better set_bit_exact
eb4b3dd3 2116// dsputil_set_bit_exact_mmx();
57060b1e
FB
2117#endif
2118}