support for DV aspect ratio and erroneous audio patch by (Dan Dennedy (dan at dennedy...
[libav.git] / libavcodec / dsputil.c
CommitLineData
de6d9b64
FB
1/*
2 * DSP utils
ff4ec49e 3 * Copyright (c) 2000, 2001 Fabrice Bellard.
de6d9b64 4 *
ff4ec49e
FB
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
de6d9b64 9 *
ff4ec49e 10 * This library is distributed in the hope that it will be useful,
de6d9b64 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ff4ec49e
FB
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
de6d9b64 14 *
ff4ec49e
FB
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
7ff037e9 18 *
59fe111e 19 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
de6d9b64 20 */
983e3246
MN
21
22/**
23 * @file dsputil.c
24 * DSP utils
25 */
26
de6d9b64
FB
27#include "avcodec.h"
28#include "dsputil.h"
1457ab52 29#include "mpegvideo.h"
b0368839 30#include "simple_idct.h"
45553457 31
5596c60c 32
0c1a9eda
ZK
33uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
34uint32_t squareTbl[512];
de6d9b64 35
0c1a9eda 36const uint8_t ff_zigzag_direct[64] = {
2ad1516a
MN
37 0, 1, 8, 16, 9, 2, 3, 10,
38 17, 24, 32, 25, 18, 11, 4, 5,
e0eac44e 39 12, 19, 26, 33, 40, 48, 41, 34,
2ad1516a 40 27, 20, 13, 6, 7, 14, 21, 28,
e0eac44e
FB
41 35, 42, 49, 56, 57, 50, 43, 36,
42 29, 22, 15, 23, 30, 37, 44, 51,
43 58, 59, 52, 45, 38, 31, 39, 46,
44 53, 60, 61, 54, 47, 55, 62, 63
45};
46
2f349de2 47/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
0c1a9eda 48uint16_t __align8 inv_zigzag_direct16[64];
2f349de2 49
0c1a9eda 50const uint8_t ff_alternate_horizontal_scan[64] = {
2ad1516a 51 0, 1, 2, 3, 8, 9, 16, 17,
e0eac44e
FB
52 10, 11, 4, 5, 6, 7, 15, 14,
53 13, 12, 19, 18, 24, 25, 32, 33,
54 26, 27, 20, 21, 22, 23, 28, 29,
55 30, 31, 34, 35, 40, 41, 48, 49,
56 42, 43, 36, 37, 38, 39, 44, 45,
57 46, 47, 50, 51, 56, 57, 58, 59,
58 52, 53, 54, 55, 60, 61, 62, 63,
59};
60
0c1a9eda 61const uint8_t ff_alternate_vertical_scan[64] = {
2ad1516a 62 0, 8, 16, 24, 1, 9, 2, 10,
e0eac44e
FB
63 17, 25, 32, 40, 48, 56, 57, 49,
64 41, 33, 26, 18, 3, 11, 4, 12,
65 19, 27, 34, 42, 50, 58, 35, 43,
66 51, 59, 20, 28, 5, 13, 6, 14,
67 21, 29, 36, 44, 52, 60, 37, 45,
68 53, 61, 22, 30, 7, 15, 23, 31,
69 38, 46, 54, 62, 39, 47, 55, 63,
70};
71
2f349de2 72/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
0c1a9eda 73const uint32_t inverse[256]={
2f349de2
MN
74 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
75 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
76 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
77 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
78 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
79 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
80 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
81 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
82 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
83 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
84 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
85 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
86 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
87 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
88 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
89 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
90 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
91 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
92 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
93 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
94 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
95 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
96 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
97 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
98 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
99 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
100 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
101 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
102 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
103 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
104 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
105 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
106};
107
b0368839
MN
108/* Input permutation for the simple_idct_mmx */
109static const uint8_t simple_mmx_permutation[64]={
110 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
111 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
112 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
113 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
114 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
115 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
116 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
117 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
118};
119
0c1a9eda 120static int pix_sum_c(uint8_t * pix, int line_size)
3aa102be
MN
121{
122 int s, i, j;
123
124 s = 0;
125 for (i = 0; i < 16; i++) {
126 for (j = 0; j < 16; j += 8) {
127 s += pix[0];
128 s += pix[1];
129 s += pix[2];
130 s += pix[3];
131 s += pix[4];
132 s += pix[5];
133 s += pix[6];
134 s += pix[7];
135 pix += 8;
136 }
137 pix += line_size - 16;
138 }
139 return s;
140}
141
0c1a9eda 142static int pix_norm1_c(uint8_t * pix, int line_size)
3aa102be
MN
143{
144 int s, i, j;
0c1a9eda 145 uint32_t *sq = squareTbl + 256;
3aa102be
MN
146
147 s = 0;
148 for (i = 0; i < 16; i++) {
149 for (j = 0; j < 16; j += 8) {
2a006cd3 150#if 0
3aa102be
MN
151 s += sq[pix[0]];
152 s += sq[pix[1]];
153 s += sq[pix[2]];
154 s += sq[pix[3]];
155 s += sq[pix[4]];
156 s += sq[pix[5]];
157 s += sq[pix[6]];
158 s += sq[pix[7]];
2a006cd3
FL
159#else
160#if LONG_MAX > 2147483647
161 register uint64_t x=*(uint64_t*)pix;
162 s += sq[x&0xff];
163 s += sq[(x>>8)&0xff];
164 s += sq[(x>>16)&0xff];
165 s += sq[(x>>24)&0xff];
166 s += sq[(x>>32)&0xff];
167 s += sq[(x>>40)&0xff];
168 s += sq[(x>>48)&0xff];
169 s += sq[(x>>56)&0xff];
170#else
171 register uint32_t x=*(uint32_t*)pix;
172 s += sq[x&0xff];
173 s += sq[(x>>8)&0xff];
174 s += sq[(x>>16)&0xff];
175 s += sq[(x>>24)&0xff];
176 x=*(uint32_t*)(pix+4);
177 s += sq[x&0xff];
178 s += sq[(x>>8)&0xff];
179 s += sq[(x>>16)&0xff];
180 s += sq[(x>>24)&0xff];
181#endif
182#endif
3aa102be
MN
183 pix += 8;
184 }
185 pix += line_size - 16;
186 }
187 return s;
188}
189
190
0c1a9eda 191static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size)
1457ab52
MN
192{
193 int s, i;
0c1a9eda 194 uint32_t *sq = squareTbl + 256;
1457ab52
MN
195
196 s = 0;
197 for (i = 0; i < 8; i++) {
198 s += sq[pix1[0] - pix2[0]];
199 s += sq[pix1[1] - pix2[1]];
200 s += sq[pix1[2] - pix2[2]];
201 s += sq[pix1[3] - pix2[3]];
202 s += sq[pix1[4] - pix2[4]];
203 s += sq[pix1[5] - pix2[5]];
204 s += sq[pix1[6] - pix2[6]];
205 s += sq[pix1[7] - pix2[7]];
206 pix1 += line_size;
207 pix2 += line_size;
208 }
209 return s;
210}
211
6b026927 212static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size)
9c76bd48 213{
6b026927
FH
214 int s, i;
215 uint32_t *sq = squareTbl + 256;
9c76bd48
BF
216
217 s = 0;
218 for (i = 0; i < 16; i++) {
6b026927
FH
219 s += sq[pix1[ 0] - pix2[ 0]];
220 s += sq[pix1[ 1] - pix2[ 1]];
221 s += sq[pix1[ 2] - pix2[ 2]];
222 s += sq[pix1[ 3] - pix2[ 3]];
223 s += sq[pix1[ 4] - pix2[ 4]];
224 s += sq[pix1[ 5] - pix2[ 5]];
225 s += sq[pix1[ 6] - pix2[ 6]];
226 s += sq[pix1[ 7] - pix2[ 7]];
227 s += sq[pix1[ 8] - pix2[ 8]];
228 s += sq[pix1[ 9] - pix2[ 9]];
229 s += sq[pix1[10] - pix2[10]];
230 s += sq[pix1[11] - pix2[11]];
231 s += sq[pix1[12] - pix2[12]];
232 s += sq[pix1[13] - pix2[13]];
233 s += sq[pix1[14] - pix2[14]];
234 s += sq[pix1[15] - pix2[15]];
2a006cd3 235
6b026927
FH
236 pix1 += line_size;
237 pix2 += line_size;
9c76bd48
BF
238 }
239 return s;
240}
241
0c1a9eda 242static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
de6d9b64 243{
de6d9b64
FB
244 int i;
245
246 /* read the pixels */
de6d9b64 247 for(i=0;i<8;i++) {
c13e1abd
FH
248 block[0] = pixels[0];
249 block[1] = pixels[1];
250 block[2] = pixels[2];
251 block[3] = pixels[3];
252 block[4] = pixels[4];
253 block[5] = pixels[5];
254 block[6] = pixels[6];
255 block[7] = pixels[7];
256 pixels += line_size;
257 block += 8;
de6d9b64
FB
258 }
259}
260
0c1a9eda
ZK
261static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
262 const uint8_t *s2, int stride){
9dbcbd92
MN
263 int i;
264
265 /* read the pixels */
9dbcbd92 266 for(i=0;i<8;i++) {
c13e1abd
FH
267 block[0] = s1[0] - s2[0];
268 block[1] = s1[1] - s2[1];
269 block[2] = s1[2] - s2[2];
270 block[3] = s1[3] - s2[3];
271 block[4] = s1[4] - s2[4];
272 block[5] = s1[5] - s2[5];
273 block[6] = s1[6] - s2[6];
274 block[7] = s1[7] - s2[7];
9dbcbd92
MN
275 s1 += stride;
276 s2 += stride;
c13e1abd 277 block += 8;
9dbcbd92
MN
278 }
279}
280
281
0c1a9eda 282static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
eb4b3dd3 283 int line_size)
de6d9b64 284{
de6d9b64 285 int i;
0c1a9eda 286 uint8_t *cm = cropTbl + MAX_NEG_CROP;
de6d9b64
FB
287
288 /* read the pixels */
de6d9b64 289 for(i=0;i<8;i++) {
c13e1abd
FH
290 pixels[0] = cm[block[0]];
291 pixels[1] = cm[block[1]];
292 pixels[2] = cm[block[2]];
293 pixels[3] = cm[block[3]];
294 pixels[4] = cm[block[4]];
295 pixels[5] = cm[block[5]];
296 pixels[6] = cm[block[6]];
297 pixels[7] = cm[block[7]];
298
299 pixels += line_size;
300 block += 8;
de6d9b64
FB
301 }
302}
303
0c1a9eda 304static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
c13e1abd 305 int line_size)
de6d9b64 306{
de6d9b64 307 int i;
0c1a9eda 308 uint8_t *cm = cropTbl + MAX_NEG_CROP;
de6d9b64
FB
309
310 /* read the pixels */
de6d9b64 311 for(i=0;i<8;i++) {
c13e1abd
FH
312 pixels[0] = cm[pixels[0] + block[0]];
313 pixels[1] = cm[pixels[1] + block[1]];
314 pixels[2] = cm[pixels[2] + block[2]];
315 pixels[3] = cm[pixels[3] + block[3]];
316 pixels[4] = cm[pixels[4] + block[4]];
317 pixels[5] = cm[pixels[5] + block[5]];
318 pixels[6] = cm[pixels[6] + block[6]];
319 pixels[7] = cm[pixels[7] + block[7]];
320 pixels += line_size;
321 block += 8;
de6d9b64
FB
322 }
323}
59fe111e
MN
324#if 0
325
326#define PIXOP2(OPNAME, OP) \
b3184779 327static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
328{\
329 int i;\
330 for(i=0; i<h; i++){\
331 OP(*((uint64_t*)block), LD64(pixels));\
332 pixels+=line_size;\
333 block +=line_size;\
334 }\
335}\
336\
45553457 337static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
338{\
339 int i;\
340 for(i=0; i<h; i++){\
341 const uint64_t a= LD64(pixels );\
342 const uint64_t b= LD64(pixels+1);\
343 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
344 pixels+=line_size;\
345 block +=line_size;\
346 }\
347}\
348\
45553457 349static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
350{\
351 int i;\
352 for(i=0; i<h; i++){\
353 const uint64_t a= LD64(pixels );\
354 const uint64_t b= LD64(pixels+1);\
355 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
356 pixels+=line_size;\
357 block +=line_size;\
358 }\
359}\
360\
45553457 361static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
362{\
363 int i;\
364 for(i=0; i<h; i++){\
365 const uint64_t a= LD64(pixels );\
366 const uint64_t b= LD64(pixels+line_size);\
367 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
368 pixels+=line_size;\
369 block +=line_size;\
370 }\
371}\
372\
45553457 373static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
374{\
375 int i;\
376 for(i=0; i<h; i++){\
377 const uint64_t a= LD64(pixels );\
378 const uint64_t b= LD64(pixels+line_size);\
379 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
380 pixels+=line_size;\
381 block +=line_size;\
382 }\
383}\
384\
45553457 385static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
386{\
387 int i;\
388 const uint64_t a= LD64(pixels );\
389 const uint64_t b= LD64(pixels+1);\
390 uint64_t l0= (a&0x0303030303030303ULL)\
391 + (b&0x0303030303030303ULL)\
392 + 0x0202020202020202ULL;\
393 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
394 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
395 uint64_t l1,h1;\
396\
397 pixels+=line_size;\
398 for(i=0; i<h; i+=2){\
399 uint64_t a= LD64(pixels );\
400 uint64_t b= LD64(pixels+1);\
401 l1= (a&0x0303030303030303ULL)\
402 + (b&0x0303030303030303ULL);\
403 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
404 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
405 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
406 pixels+=line_size;\
407 block +=line_size;\
408 a= LD64(pixels );\
409 b= LD64(pixels+1);\
410 l0= (a&0x0303030303030303ULL)\
411 + (b&0x0303030303030303ULL)\
412 + 0x0202020202020202ULL;\
413 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
414 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
415 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
416 pixels+=line_size;\
417 block +=line_size;\
418 }\
419}\
420\
45553457 421static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
422{\
423 int i;\
424 const uint64_t a= LD64(pixels );\
425 const uint64_t b= LD64(pixels+1);\
426 uint64_t l0= (a&0x0303030303030303ULL)\
427 + (b&0x0303030303030303ULL)\
428 + 0x0101010101010101ULL;\
429 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
430 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
431 uint64_t l1,h1;\
432\
433 pixels+=line_size;\
434 for(i=0; i<h; i+=2){\
435 uint64_t a= LD64(pixels );\
436 uint64_t b= LD64(pixels+1);\
437 l1= (a&0x0303030303030303ULL)\
438 + (b&0x0303030303030303ULL);\
439 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
440 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
441 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
442 pixels+=line_size;\
443 block +=line_size;\
444 a= LD64(pixels );\
445 b= LD64(pixels+1);\
446 l0= (a&0x0303030303030303ULL)\
447 + (b&0x0303030303030303ULL)\
448 + 0x0101010101010101ULL;\
449 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
450 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
451 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
452 pixels+=line_size;\
453 block +=line_size;\
454 }\
455}\
456\
45553457
ZK
457CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
458CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
459CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
460CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
461CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
462CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
463CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
59fe111e
MN
464
465#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
466#else // 64 bit variant
467
468#define PIXOP2(OPNAME, OP) \
45553457 469static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
59fe111e
MN
470 int i;\
471 for(i=0; i<h; i++){\
472 OP(*((uint32_t*)(block )), LD32(pixels ));\
473 OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
474 pixels+=line_size;\
475 block +=line_size;\
476 }\
477}\
45553457
ZK
478static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
479 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
b3184779 480}\
59fe111e 481\
b3184779
MN
482static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
483 int src_stride1, int src_stride2, int h){\
59fe111e
MN
484 int i;\
485 for(i=0; i<h; i++){\
b3184779
MN
486 uint32_t a,b;\
487 a= LD32(&src1[i*src_stride1 ]);\
488 b= LD32(&src2[i*src_stride2 ]);\
489 OP(*((uint32_t*)&dst[i*dst_stride ]), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\
490 a= LD32(&src1[i*src_stride1+4]);\
491 b= LD32(&src2[i*src_stride2+4]);\
492 OP(*((uint32_t*)&dst[i*dst_stride+4]), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\
59fe111e
MN
493 }\
494}\
495\
b3184779
MN
496static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
497 int src_stride1, int src_stride2, int h){\
59fe111e
MN
498 int i;\
499 for(i=0; i<h; i++){\
b3184779
MN
500 uint32_t a,b;\
501 a= LD32(&src1[i*src_stride1 ]);\
502 b= LD32(&src2[i*src_stride2 ]);\
503 OP(*((uint32_t*)&dst[i*dst_stride ]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\
504 a= LD32(&src1[i*src_stride1+4]);\
505 b= LD32(&src2[i*src_stride2+4]);\
506 OP(*((uint32_t*)&dst[i*dst_stride+4]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\
59fe111e
MN
507 }\
508}\
509\
b3184779
MN
510static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
511 int src_stride1, int src_stride2, int h){\
512 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
513 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
514}\
515\
516static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
517 int src_stride1, int src_stride2, int h){\
518 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
519 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
520}\
521\
45553457 522static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
523 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
524}\
525\
45553457 526static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
527 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
528}\
529\
45553457 530static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
531 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
532}\
533\
45553457 534static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
535 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
536}\
537\
538static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
539 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
59fe111e
MN
540 int i;\
541 for(i=0; i<h; i++){\
b3184779
MN
542 uint32_t a, b, c, d, l0, l1, h0, h1;\
543 a= LD32(&src1[i*src_stride1]);\
544 b= LD32(&src2[i*src_stride2]);\
545 c= LD32(&src3[i*src_stride3]);\
546 d= LD32(&src4[i*src_stride4]);\
547 l0= (a&0x03030303UL)\
548 + (b&0x03030303UL)\
549 + 0x02020202UL;\
550 h0= ((a&0xFCFCFCFCUL)>>2)\
551 + ((b&0xFCFCFCFCUL)>>2);\
552 l1= (c&0x03030303UL)\
553 + (d&0x03030303UL);\
554 h1= ((c&0xFCFCFCFCUL)>>2)\
555 + ((d&0xFCFCFCFCUL)>>2);\
556 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
557 a= LD32(&src1[i*src_stride1+4]);\
558 b= LD32(&src2[i*src_stride2+4]);\
559 c= LD32(&src3[i*src_stride3+4]);\
560 d= LD32(&src4[i*src_stride4+4]);\
561 l0= (a&0x03030303UL)\
562 + (b&0x03030303UL)\
563 + 0x02020202UL;\
564 h0= ((a&0xFCFCFCFCUL)>>2)\
565 + ((b&0xFCFCFCFCUL)>>2);\
566 l1= (c&0x03030303UL)\
567 + (d&0x03030303UL);\
568 h1= ((c&0xFCFCFCFCUL)>>2)\
569 + ((d&0xFCFCFCFCUL)>>2);\
570 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
59fe111e
MN
571 }\
572}\
b3184779
MN
573static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
574 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
59fe111e
MN
575 int i;\
576 for(i=0; i<h; i++){\
b3184779
MN
577 uint32_t a, b, c, d, l0, l1, h0, h1;\
578 a= LD32(&src1[i*src_stride1]);\
579 b= LD32(&src2[i*src_stride2]);\
580 c= LD32(&src3[i*src_stride3]);\
581 d= LD32(&src4[i*src_stride4]);\
582 l0= (a&0x03030303UL)\
583 + (b&0x03030303UL)\
584 + 0x01010101UL;\
585 h0= ((a&0xFCFCFCFCUL)>>2)\
586 + ((b&0xFCFCFCFCUL)>>2);\
587 l1= (c&0x03030303UL)\
588 + (d&0x03030303UL);\
589 h1= ((c&0xFCFCFCFCUL)>>2)\
590 + ((d&0xFCFCFCFCUL)>>2);\
591 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
592 a= LD32(&src1[i*src_stride1+4]);\
593 b= LD32(&src2[i*src_stride2+4]);\
594 c= LD32(&src3[i*src_stride3+4]);\
595 d= LD32(&src4[i*src_stride4+4]);\
596 l0= (a&0x03030303UL)\
597 + (b&0x03030303UL)\
598 + 0x01010101UL;\
599 h0= ((a&0xFCFCFCFCUL)>>2)\
600 + ((b&0xFCFCFCFCUL)>>2);\
601 l1= (c&0x03030303UL)\
602 + (d&0x03030303UL);\
603 h1= ((c&0xFCFCFCFCUL)>>2)\
604 + ((d&0xFCFCFCFCUL)>>2);\
605 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
59fe111e
MN
606 }\
607}\
b3184779
MN
608static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
609 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
610 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
611 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
612}\
613static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
614 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
615 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
616 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
617}\
59fe111e 618\
45553457 619static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
620{\
621 int j;\
622 for(j=0; j<2; j++){\
623 int i;\
624 const uint32_t a= LD32(pixels );\
625 const uint32_t b= LD32(pixels+1);\
626 uint32_t l0= (a&0x03030303UL)\
627 + (b&0x03030303UL)\
628 + 0x02020202UL;\
629 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
630 + ((b&0xFCFCFCFCUL)>>2);\
631 uint32_t l1,h1;\
632\
633 pixels+=line_size;\
634 for(i=0; i<h; i+=2){\
635 uint32_t a= LD32(pixels );\
636 uint32_t b= LD32(pixels+1);\
637 l1= (a&0x03030303UL)\
638 + (b&0x03030303UL);\
639 h1= ((a&0xFCFCFCFCUL)>>2)\
640 + ((b&0xFCFCFCFCUL)>>2);\
641 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
642 pixels+=line_size;\
643 block +=line_size;\
644 a= LD32(pixels );\
645 b= LD32(pixels+1);\
646 l0= (a&0x03030303UL)\
647 + (b&0x03030303UL)\
648 + 0x02020202UL;\
649 h0= ((a&0xFCFCFCFCUL)>>2)\
650 + ((b&0xFCFCFCFCUL)>>2);\
651 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
652 pixels+=line_size;\
653 block +=line_size;\
654 }\
655 pixels+=4-line_size*(h+1);\
656 block +=4-line_size*h;\
657 }\
658}\
659\
45553457 660static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
661{\
662 int j;\
663 for(j=0; j<2; j++){\
664 int i;\
665 const uint32_t a= LD32(pixels );\
666 const uint32_t b= LD32(pixels+1);\
667 uint32_t l0= (a&0x03030303UL)\
668 + (b&0x03030303UL)\
669 + 0x01010101UL;\
670 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
671 + ((b&0xFCFCFCFCUL)>>2);\
672 uint32_t l1,h1;\
673\
674 pixels+=line_size;\
675 for(i=0; i<h; i+=2){\
676 uint32_t a= LD32(pixels );\
677 uint32_t b= LD32(pixels+1);\
678 l1= (a&0x03030303UL)\
679 + (b&0x03030303UL);\
680 h1= ((a&0xFCFCFCFCUL)>>2)\
681 + ((b&0xFCFCFCFCUL)>>2);\
682 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
683 pixels+=line_size;\
684 block +=line_size;\
685 a= LD32(pixels );\
686 b= LD32(pixels+1);\
687 l0= (a&0x03030303UL)\
688 + (b&0x03030303UL)\
689 + 0x01010101UL;\
690 h0= ((a&0xFCFCFCFCUL)>>2)\
691 + ((b&0xFCFCFCFCUL)>>2);\
692 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
693 pixels+=line_size;\
694 block +=line_size;\
695 }\
696 pixels+=4-line_size*(h+1);\
697 block +=4-line_size*h;\
698 }\
699}\
700\
45553457
ZK
701CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
702CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
703CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
704CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
705CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
706CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
707CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
708CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
b3184779 709
59fe111e
MN
710#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
711#endif
59fe111e
MN
712#define op_put(a, b) a = b
713
714PIXOP2(avg, op_avg)
715PIXOP2(put, op_put)
716#undef op_avg
717#undef op_put
718
de6d9b64
FB
719#define avg2(a,b) ((a+b+1)>>1)
720#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
721
073b013d 722
0c1a9eda 723static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
44eb4951
MN
724{
725 const int A=(16-x16)*(16-y16);
726 const int B=( x16)*(16-y16);
727 const int C=(16-x16)*( y16);
728 const int D=( x16)*( y16);
729 int i;
44eb4951
MN
730
731 for(i=0; i<h; i++)
732 {
b3184779
MN
733 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
734 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
735 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
736 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
737 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
738 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
739 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
740 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
741 dst+= stride;
742 src+= stride;
44eb4951
MN
743 }
744}
745
0c1a9eda 746static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
073b013d
MN
747 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
748{
749 int y, vx, vy;
750 const int s= 1<<shift;
751
752 width--;
753 height--;
754
755 for(y=0; y<h; y++){
756 int x;
757
758 vx= ox;
759 vy= oy;
760 for(x=0; x<8; x++){ //XXX FIXME optimize
761 int src_x, src_y, frac_x, frac_y, index;
762
763 src_x= vx>>16;
764 src_y= vy>>16;
765 frac_x= src_x&(s-1);
766 frac_y= src_y&(s-1);
767 src_x>>=shift;
768 src_y>>=shift;
769
770 if((unsigned)src_x < width){
771 if((unsigned)src_y < height){
772 index= src_x + src_y*stride;
773 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
774 + src[index +1]* frac_x )*(s-frac_y)
775 + ( src[index+stride ]*(s-frac_x)
776 + src[index+stride+1]* frac_x )* frac_y
777 + r)>>(shift*2);
778 }else{
779 index= src_x + clip(src_y, 0, height)*stride;
780 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
781 + src[index +1]* frac_x )*s
782 + r)>>(shift*2);
783 }
784 }else{
785 if((unsigned)src_y < height){
786 index= clip(src_x, 0, width) + src_y*stride;
787 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
788 + src[index+stride ]* frac_y )*s
789 + r)>>(shift*2);
790 }else{
791 index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride;
792 dst[y*stride + x]= src[index ];
793 }
794 }
795
796 vx+= dxx;
797 vy+= dyx;
798 }
799 ox += dxy;
800 oy += dyy;
801 }
802}
803
0c1a9eda 804static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
44eb4951 805{
44eb4951
MN
806 int i;
807 for(i=0; i<h; i++)
808 {
b3184779
MN
809 ST32(dst , LD32(src ));
810 ST32(dst+4 , LD32(src+4 ));
811 ST32(dst+8 , LD32(src+8 ));
812 ST32(dst+12, LD32(src+12));
813 dst[16]= src[16];
44eb4951
MN
814 dst+=dstStride;
815 src+=srcStride;
816 }
817}
818
0c1a9eda 819static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
44eb4951
MN
820{
821 int i;
b3184779 822 for(i=0; i<h; i++)
44eb4951 823 {
b3184779
MN
824 ST32(dst , LD32(src ));
825 ST32(dst+4 , LD32(src+4 ));
826 dst[8]= src[8];
44eb4951
MN
827 dst+=dstStride;
828 src+=srcStride;
829 }
830}
831
826f429a 832
b3184779 833#define QPEL_MC(r, OPNAME, RND, OP) \
0c1a9eda
ZK
834static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
835 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
b3184779
MN
836 int i;\
837 for(i=0; i<h; i++)\
838 {\
839 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
840 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
841 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
842 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
843 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
844 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
845 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
846 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
847 dst+=dstStride;\
848 src+=srcStride;\
849 }\
44eb4951
MN
850}\
851\
0c1a9eda 852static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
db794953 853 const int w=8;\
0c1a9eda 854 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
b3184779
MN
855 int i;\
856 for(i=0; i<w; i++)\
857 {\
858 const int src0= src[0*srcStride];\
859 const int src1= src[1*srcStride];\
860 const int src2= src[2*srcStride];\
861 const int src3= src[3*srcStride];\
862 const int src4= src[4*srcStride];\
863 const int src5= src[5*srcStride];\
864 const int src6= src[6*srcStride];\
865 const int src7= src[7*srcStride];\
866 const int src8= src[8*srcStride];\
867 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
868 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
869 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
870 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
871 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
872 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
873 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
874 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
875 dst++;\
876 src++;\
877 }\
878}\
879\
0c1a9eda
ZK
880static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
881 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
b3184779 882 int i;\
826f429a 883 \
b3184779
MN
884 for(i=0; i<h; i++)\
885 {\
886 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
887 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
888 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
889 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
890 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
891 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
892 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
893 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
894 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
895 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
896 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
897 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
898 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
899 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
900 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
901 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
902 dst+=dstStride;\
903 src+=srcStride;\
904 }\
905}\
906\
0c1a9eda
ZK
907static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
908 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
b3184779 909 int i;\
826f429a 910 const int w=16;\
b3184779
MN
911 for(i=0; i<w; i++)\
912 {\
913 const int src0= src[0*srcStride];\
914 const int src1= src[1*srcStride];\
915 const int src2= src[2*srcStride];\
916 const int src3= src[3*srcStride];\
917 const int src4= src[4*srcStride];\
918 const int src5= src[5*srcStride];\
919 const int src6= src[6*srcStride];\
920 const int src7= src[7*srcStride];\
921 const int src8= src[8*srcStride];\
922 const int src9= src[9*srcStride];\
923 const int src10= src[10*srcStride];\
924 const int src11= src[11*srcStride];\
925 const int src12= src[12*srcStride];\
926 const int src13= src[13*srcStride];\
927 const int src14= src[14*srcStride];\
928 const int src15= src[15*srcStride];\
929 const int src16= src[16*srcStride];\
930 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
931 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
932 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
933 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
934 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
935 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
936 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
937 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
938 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
939 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
940 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
941 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
942 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
943 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
944 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
945 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
946 dst++;\
947 src++;\
948 }\
949}\
950\
0c1a9eda 951static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
45553457 952 OPNAME ## pixels8_c(dst, src, stride, 8);\
b3184779
MN
953}\
954\
0c1a9eda
ZK
955static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
956 uint8_t half[64];\
b3184779
MN
957 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
958 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
44eb4951
MN
959}\
960\
0c1a9eda 961static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
b3184779 962 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
44eb4951
MN
963}\
964\
0c1a9eda
ZK
965static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
966 uint8_t half[64];\
b3184779
MN
967 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
968 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
44eb4951
MN
969}\
970\
0c1a9eda
ZK
971static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
972 uint8_t full[16*9];\
973 uint8_t half[64];\
b3184779 974 copy_block9(full, src, 16, stride, 9);\
db794953 975 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
b3184779 976 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
44eb4951
MN
977}\
978\
0c1a9eda
ZK
979static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
980 uint8_t full[16*9];\
b3184779 981 copy_block9(full, src, 16, stride, 9);\
db794953 982 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
44eb4951
MN
983}\
984\
0c1a9eda
ZK
985static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
986 uint8_t full[16*9];\
987 uint8_t half[64];\
b3184779 988 copy_block9(full, src, 16, stride, 9);\
db794953 989 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
b3184779 990 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
44eb4951 991}\
0c1a9eda
ZK
992void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
993 uint8_t full[16*9];\
994 uint8_t halfH[72];\
995 uint8_t halfV[64];\
996 uint8_t halfHV[64];\
b3184779
MN
997 copy_block9(full, src, 16, stride, 9);\
998 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
999 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1000 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1001 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1002}\
0c1a9eda
ZK
1003static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1004 uint8_t full[16*9];\
1005 uint8_t halfH[72];\
1006 uint8_t halfHV[64];\
db794953
MN
1007 copy_block9(full, src, 16, stride, 9);\
1008 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1009 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1010 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1011 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1012}\
0c1a9eda
ZK
1013void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1014 uint8_t full[16*9];\
1015 uint8_t halfH[72];\
1016 uint8_t halfV[64];\
1017 uint8_t halfHV[64];\
b3184779
MN
1018 copy_block9(full, src, 16, stride, 9);\
1019 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1020 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1021 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1022 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1023}\
0c1a9eda
ZK
1024static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1025 uint8_t full[16*9];\
1026 uint8_t halfH[72];\
1027 uint8_t halfHV[64];\
db794953
MN
1028 copy_block9(full, src, 16, stride, 9);\
1029 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1030 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1031 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1032 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1033}\
0c1a9eda
ZK
1034void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1035 uint8_t full[16*9];\
1036 uint8_t halfH[72];\
1037 uint8_t halfV[64];\
1038 uint8_t halfHV[64];\
b3184779
MN
1039 copy_block9(full, src, 16, stride, 9);\
1040 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1041 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1042 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1043 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1044}\
0c1a9eda
ZK
1045static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1046 uint8_t full[16*9];\
1047 uint8_t halfH[72];\
1048 uint8_t halfHV[64];\
db794953
MN
1049 copy_block9(full, src, 16, stride, 9);\
1050 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1051 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1052 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1053 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1054}\
0c1a9eda
ZK
1055void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1056 uint8_t full[16*9];\
1057 uint8_t halfH[72];\
1058 uint8_t halfV[64];\
1059 uint8_t halfHV[64];\
b3184779
MN
1060 copy_block9(full, src, 16, stride, 9);\
1061 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
db794953
MN
1062 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1063 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1064 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1065}\
0c1a9eda
ZK
1066static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1067 uint8_t full[16*9];\
1068 uint8_t halfH[72];\
1069 uint8_t halfHV[64];\
db794953
MN
1070 copy_block9(full, src, 16, stride, 9);\
1071 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1072 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1073 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1074 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1075}\
0c1a9eda
ZK
1076static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1077 uint8_t halfH[72];\
1078 uint8_t halfHV[64];\
b3184779 1079 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1080 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1081 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
44eb4951 1082}\
0c1a9eda
ZK
1083static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1084 uint8_t halfH[72];\
1085 uint8_t halfHV[64];\
b3184779 1086 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1087 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1088 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
44eb4951 1089}\
0c1a9eda
ZK
1090void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1091 uint8_t full[16*9];\
1092 uint8_t halfH[72];\
1093 uint8_t halfV[64];\
1094 uint8_t halfHV[64];\
b3184779
MN
1095 copy_block9(full, src, 16, stride, 9);\
1096 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1097 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1098 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1099 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 1100}\
0c1a9eda
ZK
1101static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1102 uint8_t full[16*9];\
1103 uint8_t halfH[72];\
db794953
MN
1104 copy_block9(full, src, 16, stride, 9);\
1105 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1106 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1107 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1108}\
0c1a9eda
ZK
1109void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1110 uint8_t full[16*9];\
1111 uint8_t halfH[72];\
1112 uint8_t halfV[64];\
1113 uint8_t halfHV[64];\
b3184779
MN
1114 copy_block9(full, src, 16, stride, 9);\
1115 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1116 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1117 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1118 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 1119}\
0c1a9eda
ZK
1120static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1121 uint8_t full[16*9];\
1122 uint8_t halfH[72];\
db794953
MN
1123 copy_block9(full, src, 16, stride, 9);\
1124 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1125 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1126 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1127}\
0c1a9eda
ZK
1128static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1129 uint8_t halfH[72];\
b3184779 1130 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1131 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
b3184779 1132}\
0c1a9eda 1133static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
45553457 1134 OPNAME ## pixels16_c(dst, src, stride, 16);\
b3184779
MN
1135}\
1136\
0c1a9eda
ZK
1137static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1138 uint8_t half[256];\
b3184779
MN
1139 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1140 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
1141}\
1142\
0c1a9eda 1143static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
b3184779 1144 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
44eb4951 1145}\
b3184779 1146\
0c1a9eda
ZK
1147static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1148 uint8_t half[256];\
b3184779
MN
1149 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1150 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
1151}\
1152\
0c1a9eda
ZK
1153static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1154 uint8_t full[24*17];\
1155 uint8_t half[256];\
b3184779 1156 copy_block17(full, src, 24, stride, 17);\
826f429a 1157 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
b3184779
MN
1158 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
1159}\
1160\
0c1a9eda
ZK
1161static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1162 uint8_t full[24*17];\
b3184779 1163 copy_block17(full, src, 24, stride, 17);\
826f429a 1164 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
b3184779
MN
1165}\
1166\
0c1a9eda
ZK
1167static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1168 uint8_t full[24*17];\
1169 uint8_t half[256];\
b3184779 1170 copy_block17(full, src, 24, stride, 17);\
826f429a 1171 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
b3184779
MN
1172 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
1173}\
0c1a9eda
ZK
1174void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1175 uint8_t full[24*17];\
1176 uint8_t halfH[272];\
1177 uint8_t halfV[256];\
1178 uint8_t halfHV[256];\
b3184779
MN
1179 copy_block17(full, src, 24, stride, 17);\
1180 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1181 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1182 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1183 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1184}\
0c1a9eda
ZK
1185static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1186 uint8_t full[24*17];\
1187 uint8_t halfH[272];\
1188 uint8_t halfHV[256];\
db794953
MN
1189 copy_block17(full, src, 24, stride, 17);\
1190 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1191 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1192 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1193 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1194}\
0c1a9eda
ZK
1195void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1196 uint8_t full[24*17];\
1197 uint8_t halfH[272];\
1198 uint8_t halfV[256];\
1199 uint8_t halfHV[256];\
b3184779
MN
1200 copy_block17(full, src, 24, stride, 17);\
1201 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1202 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1203 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1204 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1205}\
0c1a9eda
ZK
1206static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1207 uint8_t full[24*17];\
1208 uint8_t halfH[272];\
1209 uint8_t halfHV[256];\
db794953
MN
1210 copy_block17(full, src, 24, stride, 17);\
1211 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1212 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1213 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1214 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1215}\
0c1a9eda
ZK
1216void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1217 uint8_t full[24*17];\
1218 uint8_t halfH[272];\
1219 uint8_t halfV[256];\
1220 uint8_t halfHV[256];\
b3184779
MN
1221 copy_block17(full, src, 24, stride, 17);\
1222 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1223 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1224 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1225 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1226}\
0c1a9eda
ZK
1227static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1228 uint8_t full[24*17];\
1229 uint8_t halfH[272];\
1230 uint8_t halfHV[256];\
db794953
MN
1231 copy_block17(full, src, 24, stride, 17);\
1232 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1233 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1234 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1235 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1236}\
0c1a9eda
ZK
1237void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1238 uint8_t full[24*17];\
1239 uint8_t halfH[272];\
1240 uint8_t halfV[256];\
1241 uint8_t halfHV[256];\
b3184779
MN
1242 copy_block17(full, src, 24, stride, 17);\
1243 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
826f429a
MN
1244 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1245 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1246 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1247}\
0c1a9eda
ZK
1248static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1249 uint8_t full[24*17];\
1250 uint8_t halfH[272];\
1251 uint8_t halfHV[256];\
db794953
MN
1252 copy_block17(full, src, 24, stride, 17);\
1253 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1254 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1255 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1256 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1257}\
0c1a9eda
ZK
1258static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1259 uint8_t halfH[272];\
1260 uint8_t halfHV[256];\
b3184779 1261 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1262 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1263 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1264}\
0c1a9eda
ZK
1265static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1266 uint8_t halfH[272];\
1267 uint8_t halfHV[256];\
b3184779 1268 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1269 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1270 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1271}\
0c1a9eda
ZK
1272void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1273 uint8_t full[24*17];\
1274 uint8_t halfH[272];\
1275 uint8_t halfV[256];\
1276 uint8_t halfHV[256];\
b3184779
MN
1277 copy_block17(full, src, 24, stride, 17);\
1278 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1279 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1280 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1281 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
1282}\
0c1a9eda
ZK
1283static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1284 uint8_t full[24*17];\
1285 uint8_t halfH[272];\
db794953
MN
1286 copy_block17(full, src, 24, stride, 17);\
1287 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1288 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1289 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1290}\
0c1a9eda
ZK
1291void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1292 uint8_t full[24*17];\
1293 uint8_t halfH[272];\
1294 uint8_t halfV[256];\
1295 uint8_t halfHV[256];\
b3184779
MN
1296 copy_block17(full, src, 24, stride, 17);\
1297 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1298 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1299 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1300 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
1301}\
0c1a9eda
ZK
1302static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1303 uint8_t full[24*17];\
1304 uint8_t halfH[272];\
db794953
MN
1305 copy_block17(full, src, 24, stride, 17);\
1306 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1307 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1308 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1309}\
0c1a9eda
ZK
1310static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1311 uint8_t halfH[272];\
b3184779 1312 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1313 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
45553457 1314}
44eb4951 1315
b3184779
MN
1316#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1317#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1318#define op_put(a, b) a = cm[((b) + 16)>>5]
1319#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1320
1321QPEL_MC(0, put_ , _ , op_put)
1322QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1323QPEL_MC(0, avg_ , _ , op_avg)
1324//QPEL_MC(1, avg_no_rnd , _ , op_avg)
1325#undef op_avg
1326#undef op_avg_no_rnd
1327#undef op_put
1328#undef op_put_no_rnd
44eb4951 1329
1457ab52
MN
1330static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
1331 uint8_t *cm = cropTbl + MAX_NEG_CROP;
1332 int i;
1333
1334 for(i=0; i<h; i++){
1335 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1336 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1337 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1338 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1339 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1340 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1341 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1342 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1343 dst+=dstStride;
1344 src+=srcStride;
1345 }
1346}
1347
1348static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
1349 uint8_t *cm = cropTbl + MAX_NEG_CROP;
1350 int i;
1351
1352 for(i=0; i<w; i++){
1353 const int src_1= src[ -srcStride];
1354 const int src0 = src[0 ];
1355 const int src1 = src[ srcStride];
1356 const int src2 = src[2*srcStride];
1357 const int src3 = src[3*srcStride];
1358 const int src4 = src[4*srcStride];
1359 const int src5 = src[5*srcStride];
1360 const int src6 = src[6*srcStride];
1361 const int src7 = src[7*srcStride];
1362 const int src8 = src[8*srcStride];
1363 const int src9 = src[9*srcStride];
1364 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1365 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1366 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1367 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1368 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1369 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1370 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1371 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1372 src++;
1373 dst++;
1374 }
1375}
1376
1377static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
1378 put_pixels8_c(dst, src, stride, 8);
1379}
1380
1381static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
1382 uint8_t half[64];
1383 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1384 put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
1385}
1386
1387static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
1388 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
1389}
1390
1391static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
1392 uint8_t half[64];
1393 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1394 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
1395}
1396
1397static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
1398 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
1399}
1400
1401static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
1402 uint8_t halfH[88];
1403 uint8_t halfV[64];
1404 uint8_t halfHV[64];
1405 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1406 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1407 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1408 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
1409}
1410static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
1411 uint8_t halfH[88];
1412 uint8_t halfV[64];
1413 uint8_t halfHV[64];
1414 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1415 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1416 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1417 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
1418}
1419static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
1420 uint8_t halfH[88];
1421 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1422 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
1423}
1424
1425
0c1a9eda 1426static inline int pix_abs16x16_c(uint8_t *pix1, uint8_t *pix2, int line_size)
de6d9b64
FB
1427{
1428 int s, i;
1429
1430 s = 0;
ba6802de 1431 for(i=0;i<16;i++) {
de6d9b64
FB
1432 s += abs(pix1[0] - pix2[0]);
1433 s += abs(pix1[1] - pix2[1]);
1434 s += abs(pix1[2] - pix2[2]);
1435 s += abs(pix1[3] - pix2[3]);
1436 s += abs(pix1[4] - pix2[4]);
1437 s += abs(pix1[5] - pix2[5]);
1438 s += abs(pix1[6] - pix2[6]);
1439 s += abs(pix1[7] - pix2[7]);
1440 s += abs(pix1[8] - pix2[8]);
1441 s += abs(pix1[9] - pix2[9]);
1442 s += abs(pix1[10] - pix2[10]);
1443 s += abs(pix1[11] - pix2[11]);
1444 s += abs(pix1[12] - pix2[12]);
1445 s += abs(pix1[13] - pix2[13]);
1446 s += abs(pix1[14] - pix2[14]);
1447 s += abs(pix1[15] - pix2[15]);
1448 pix1 += line_size;
1449 pix2 += line_size;
1450 }
1451 return s;
1452}
1453
0c1a9eda 1454static int pix_abs16x16_x2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
de6d9b64
FB
1455{
1456 int s, i;
1457
1458 s = 0;
ba6802de 1459 for(i=0;i<16;i++) {
de6d9b64
FB
1460 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1461 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1462 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1463 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1464 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1465 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1466 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1467 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1468 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
1469 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
1470 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
1471 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
1472 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
1473 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
1474 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
1475 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
1476 pix1 += line_size;
1477 pix2 += line_size;
1478 }
1479 return s;
1480}
1481
0c1a9eda 1482static int pix_abs16x16_y2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
de6d9b64
FB
1483{
1484 int s, i;
0c1a9eda 1485 uint8_t *pix3 = pix2 + line_size;
de6d9b64
FB
1486
1487 s = 0;
ba6802de 1488 for(i=0;i<16;i++) {
de6d9b64
FB
1489 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1490 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1491 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1492 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1493 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1494 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1495 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1496 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1497 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
1498 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
1499 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
1500 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
1501 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
1502 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
1503 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
1504 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
1505 pix1 += line_size;
1506 pix2 += line_size;
1507 pix3 += line_size;
1508 }
1509 return s;
1510}
1511
0c1a9eda 1512static int pix_abs16x16_xy2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
de6d9b64
FB
1513{
1514 int s, i;
0c1a9eda 1515 uint8_t *pix3 = pix2 + line_size;
de6d9b64
FB
1516
1517 s = 0;
ba6802de 1518 for(i=0;i<16;i++) {
de6d9b64
FB
1519 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1520 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1521 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1522 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1523 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1524 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1525 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1526 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1527 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1528 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1529 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1530 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1531 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1532 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1533 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1534 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1535 pix1 += line_size;
1536 pix2 += line_size;
1537 pix3 += line_size;
1538 }
1539 return s;
1540}
1541
0c1a9eda 1542static inline int pix_abs8x8_c(uint8_t *pix1, uint8_t *pix2, int line_size)
ba6802de
MN
1543{
1544 int s, i;
1545
1546 s = 0;
1547 for(i=0;i<8;i++) {
1548 s += abs(pix1[0] - pix2[0]);
1549 s += abs(pix1[1] - pix2[1]);
1550 s += abs(pix1[2] - pix2[2]);
1551 s += abs(pix1[3] - pix2[3]);
1552 s += abs(pix1[4] - pix2[4]);
1553 s += abs(pix1[5] - pix2[5]);
1554 s += abs(pix1[6] - pix2[6]);
1555 s += abs(pix1[7] - pix2[7]);
1556 pix1 += line_size;
1557 pix2 += line_size;
1558 }
1559 return s;
1560}
1561
0c1a9eda 1562static int pix_abs8x8_x2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
ba6802de
MN
1563{
1564 int s, i;
1565
1566 s = 0;
1567 for(i=0;i<8;i++) {
1568 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1569 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1570 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1571 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1572 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1573 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1574 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1575 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1576 pix1 += line_size;
1577 pix2 += line_size;
1578 }
1579 return s;
1580}
1581
0c1a9eda 1582static int pix_abs8x8_y2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
ba6802de
MN
1583{
1584 int s, i;
0c1a9eda 1585 uint8_t *pix3 = pix2 + line_size;
ba6802de
MN
1586
1587 s = 0;
1588 for(i=0;i<8;i++) {
1589 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1590 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1591 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1592 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1593 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1594 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1595 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1596 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1597 pix1 += line_size;
1598 pix2 += line_size;
1599 pix3 += line_size;
1600 }
1601 return s;
1602}
1603
0c1a9eda 1604static int pix_abs8x8_xy2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
ba6802de
MN
1605{
1606 int s, i;
0c1a9eda 1607 uint8_t *pix3 = pix2 + line_size;
ba6802de
MN
1608
1609 s = 0;
1610 for(i=0;i<8;i++) {
1611 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1612 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1613 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1614 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1615 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1616 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1617 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1618 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1619 pix1 += line_size;
1620 pix2 += line_size;
1621 pix3 += line_size;
1622 }
1623 return s;
1624}
1625
1457ab52
MN
1626static int sad16x16_c(void *s, uint8_t *a, uint8_t *b, int stride){
1627 return pix_abs16x16_c(a,b,stride);
1628}
1629
1630static int sad8x8_c(void *s, uint8_t *a, uint8_t *b, int stride){
1631 return pix_abs8x8_c(a,b,stride);
1632}
1633
a9badb51
MN
1634/**
1635 * permutes an 8x8 block.
2a5700de 1636 * @param block the block which will be permuted according to the given permutation vector
a9badb51
MN
1637 * @param permutation the permutation vector
1638 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
2a5700de
MN
1639 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
1640 * (inverse) permutated to scantable order!
a9badb51 1641 */
0c1a9eda 1642void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
d962f6fd 1643{
7801d21d 1644 int i;
477ab036 1645 DCTELEM temp[64];
7801d21d
MN
1646
1647 if(last<=0) return;
9a7b310d 1648 //if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms
d962f6fd 1649
7801d21d
MN
1650 for(i=0; i<=last; i++){
1651 const int j= scantable[i];
1652 temp[j]= block[j];
1653 block[j]=0;
1654 }
1655
1656 for(i=0; i<=last; i++){
1657 const int j= scantable[i];
1658 const int perm_j= permutation[j];
1659 block[perm_j]= temp[j];
1660 }
d962f6fd 1661}
e0eac44e 1662
2a5700de
MN
1663/**
1664 * memset(blocks, 0, sizeof(DCTELEM)*6*64)
1665 */
eb4b3dd3 1666static void clear_blocks_c(DCTELEM *blocks)
649c00c9
MN
1667{
1668 memset(blocks, 0, sizeof(DCTELEM)*6*64);
1669}
1670
11f18faf
MN
1671static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
1672 int i;
d32ac509 1673 for(i=0; i+7<w; i+=8){
11f18faf
MN
1674 dst[i+0] += src[i+0];
1675 dst[i+1] += src[i+1];
1676 dst[i+2] += src[i+2];
1677 dst[i+3] += src[i+3];
1678 dst[i+4] += src[i+4];
1679 dst[i+5] += src[i+5];
1680 dst[i+6] += src[i+6];
1681 dst[i+7] += src[i+7];
1682 }
1683 for(; i<w; i++)
1684 dst[i+0] += src[i+0];
1685}
1686
1687static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
1688 int i;
d32ac509 1689 for(i=0; i+7<w; i+=8){
11f18faf
MN
1690 dst[i+0] = src1[i+0]-src2[i+0];
1691 dst[i+1] = src1[i+1]-src2[i+1];
1692 dst[i+2] = src1[i+2]-src2[i+2];
1693 dst[i+3] = src1[i+3]-src2[i+3];
1694 dst[i+4] = src1[i+4]-src2[i+4];
1695 dst[i+5] = src1[i+5]-src2[i+5];
1696 dst[i+6] = src1[i+6]-src2[i+6];
1697 dst[i+7] = src1[i+7]-src2[i+7];
1698 }
1699 for(; i<w; i++)
1700 dst[i+0] = src1[i+0]-src2[i+0];
1701}
1702
1457ab52
MN
1703#define BUTTERFLY2(o1,o2,i1,i2) \
1704o1= (i1)+(i2);\
1705o2= (i1)-(i2);
1706
1707#define BUTTERFLY1(x,y) \
1708{\
1709 int a,b;\
1710 a= x;\
1711 b= y;\
1712 x= a+b;\
1713 y= a-b;\
1714}
1715
1716#define BUTTERFLYA(x,y) (ABS((x)+(y)) + ABS((x)-(y)))
1717
1718static int hadamard8_diff_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride){
1719 int i;
1720 int temp[64];
1721 int sum=0;
1722
1723 for(i=0; i<8; i++){
1724 //FIXME try pointer walks
1725 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
1726 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
1727 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
1728 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
1729
1730 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
1731 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
1732 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
1733 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
1734
1735 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
1736 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
1737 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
1738 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
1739 }
1740
1741 for(i=0; i<8; i++){
1742 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
1743 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
1744 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
1745 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
1746
1747 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
1748 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
1749 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
1750 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
1751
1752 sum +=
1753 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
1754 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
1755 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
1756 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
1757 }
1758#if 0
1759static int maxi=0;
1760if(sum>maxi){
1761 maxi=sum;
1762 printf("MAX:%d\n", maxi);
1763}
1764#endif
1765 return sum;
1766}
1767
1768static int hadamard8_abs_c(uint8_t *src, int stride, int mean){
1769 int i;
1770 int temp[64];
1771 int sum=0;
1772//FIXME OOOPS ignore 0 term instead of mean mess
1773 for(i=0; i<8; i++){
1774 //FIXME try pointer walks
1775 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-mean,src[stride*i+1]-mean);
1776 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-mean,src[stride*i+3]-mean);
1777 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-mean,src[stride*i+5]-mean);
1778 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-mean,src[stride*i+7]-mean);
1779
1780 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
1781 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
1782 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
1783 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
1784
1785 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
1786 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
1787 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
1788 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
1789 }
1790
1791 for(i=0; i<8; i++){
1792 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
1793 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
1794 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
1795 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
1796
1797 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
1798 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
1799 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
1800 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
1801
1802 sum +=
1803 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
1804 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
1805 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
1806 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
1807 }
1808
1809 return sum;
1810}
1811
1812static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
1813 MpegEncContext * const s= (MpegEncContext *)c;
76fbb024
MN
1814 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
1815 DCTELEM * const temp= (DCTELEM*)aligned_temp;
1457ab52
MN
1816 int sum=0, i;
1817
1818 s->dsp.diff_pixels(temp, src1, src2, stride);
b0368839 1819 s->dsp.fdct(temp);
1457ab52
MN
1820
1821 for(i=0; i<64; i++)
1822 sum+= ABS(temp[i]);
1823
1824 return sum;
1825}
1826
0e15384d 1827void simple_idct(DCTELEM *block); //FIXME
1457ab52
MN
1828
1829static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
1830 MpegEncContext * const s= (MpegEncContext *)c;
76fbb024
MN
1831 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64*2/8];
1832 DCTELEM * const temp= (DCTELEM*)aligned_temp;
1833 DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64;
1457ab52
MN
1834 int sum=0, i;
1835
1836 s->mb_intra=0;
1837
1838 s->dsp.diff_pixels(temp, src1, src2, stride);
1839
1840 memcpy(bak, temp, 64*sizeof(DCTELEM));
1841
67725183 1842 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
1457ab52
MN
1843 s->dct_unquantize(s, temp, 0, s->qscale);
1844 simple_idct(temp); //FIXME
1845
1846 for(i=0; i<64; i++)
1847 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
1848
1849 return sum;
1850}
1851
3a87ac94
MN
1852static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
1853 MpegEncContext * const s= (MpegEncContext *)c;
0c1a9eda 1854 const uint8_t *scantable= s->intra_scantable.permutated;
76fbb024
MN
1855 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
1856 uint64_t __align8 aligned_bak[stride];
1857 DCTELEM * const temp= (DCTELEM*)aligned_temp;
1858 uint8_t * const bak= (uint8_t*)aligned_bak;
3a87ac94
MN
1859 int i, last, run, bits, level, distoration, start_i;
1860 const int esc_length= s->ac_esc_length;
1861 uint8_t * length;
1862 uint8_t * last_length;
67725183
MN
1863
1864 for(i=0; i<8; i++){
1865 ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0];
1866 ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1];
1867 }
3a87ac94 1868
67725183
MN
1869 s->dsp.diff_pixels(temp, src1, src2, stride);
1870
1871 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
1872
1873 bits=0;
3a87ac94
MN
1874
1875 if (s->mb_intra) {
67725183 1876 start_i = 1;
3a87ac94
MN
1877 length = s->intra_ac_vlc_length;
1878 last_length= s->intra_ac_vlc_last_length;
67725183 1879 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3a87ac94
MN
1880 } else {
1881 start_i = 0;
1882 length = s->inter_ac_vlc_length;
1883 last_length= s->inter_ac_vlc_last_length;
1884 }
3a87ac94 1885
67725183 1886 if(last>=start_i){
3a87ac94
MN
1887 run=0;
1888 for(i=start_i; i<last; i++){
1889 int j= scantable[i];
1890 level= temp[j];
1891
1892 if(level){
1893 level+=64;
1894 if((level&(~127)) == 0){
1895 bits+= length[UNI_AC_ENC_INDEX(run, level)];
1896 }else
1897 bits+= esc_length;
1898 run=0;
1899 }else
1900 run++;
1901 }
1902 i= scantable[last];
1d0eab1d 1903
3a87ac94 1904 level= temp[i] + 64;
1d0eab1d
MN
1905
1906 assert(level - 64);
1907
3a87ac94
MN
1908 if((level&(~127)) == 0){
1909 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
1910 }else
1911 bits+= esc_length;
1912
67725183
MN
1913 }
1914
1915 if(last>=0){
3a87ac94
MN
1916 s->dct_unquantize(s, temp, 0, s->qscale);
1917 }
1918
b0368839 1919 s->dsp.idct_add(bak, stride, temp);
3a87ac94
MN
1920
1921 distoration= s->dsp.sse[1](NULL, bak, src1, stride);
1922
67725183 1923 return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7);
3a87ac94
MN
1924}
1925
1926static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
1927 MpegEncContext * const s= (MpegEncContext *)c;
0c1a9eda 1928 const uint8_t *scantable= s->intra_scantable.permutated;
76fbb024
MN
1929 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
1930 DCTELEM * const temp= (DCTELEM*)aligned_temp;
3a87ac94
MN
1931 int i, last, run, bits, level, start_i;
1932 const int esc_length= s->ac_esc_length;
1933 uint8_t * length;
1934 uint8_t * last_length;
67725183
MN
1935
1936 s->dsp.diff_pixels(temp, src1, src2, stride);
3a87ac94 1937
67725183
MN
1938 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
1939
1940 bits=0;
3a87ac94
MN
1941
1942 if (s->mb_intra) {
67725183 1943 start_i = 1;
3a87ac94
MN
1944 length = s->intra_ac_vlc_length;
1945 last_length= s->intra_ac_vlc_last_length;
67725183 1946 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3a87ac94
MN
1947 } else {
1948 start_i = 0;
1949 length = s->inter_ac_vlc_length;
1950 last_length= s->inter_ac_vlc_last_length;
1951 }
3a87ac94 1952
67725183 1953 if(last>=start_i){
3a87ac94
MN
1954 run=0;
1955 for(i=start_i; i<last; i++){
1956 int j= scantable[i];
1957 level= temp[j];
1958
1959 if(level){
1960 level+=64;
1961 if((level&(~127)) == 0){
1962 bits+= length[UNI_AC_ENC_INDEX(run, level)];
1963 }else
1964 bits+= esc_length;
1965 run=0;
1966 }else
1967 run++;
1968 }
1969 i= scantable[last];
67725183
MN
1970
1971 level= temp[i] + 64;
3a87ac94 1972
67725183 1973 assert(level - 64);
3a87ac94 1974
3a87ac94
MN
1975 if((level&(~127)) == 0){
1976 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
1977 }else
1978 bits+= esc_length;
1979 }
1980
1981 return bits;
1982}
1983
1984
1457ab52
MN
1985WARPER88_1616(hadamard8_diff_c, hadamard8_diff16_c)
1986WARPER88_1616(dct_sad8x8_c, dct_sad16x16_c)
1987WARPER88_1616(quant_psnr8x8_c, quant_psnr16x16_c)
3a87ac94
MN
1988WARPER88_1616(rd8x8_c, rd16x16_c)
1989WARPER88_1616(bit8x8_c, bit16x16_c)
1457ab52 1990
b0368839
MN
1991/* XXX: those functions should be suppressed ASAP when all IDCTs are
1992 converted */
1993static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
1994{
1995 j_rev_dct (block);
1996 put_pixels_clamped_c(block, dest, line_size);
1997}
1998static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
1999{
2000 j_rev_dct (block);
2001 add_pixels_clamped_c(block, dest, line_size);
2002}
2003
2004void dsputil_init(DSPContext* c, AVCodecContext *avctx)
e0eac44e 2005{
5abd509a 2006 static int init_done = 0;
d2975f8d 2007 int i;
e0eac44e 2008
5abd509a
ZK
2009 if (!init_done) {
2010 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
2011 for(i=0;i<MAX_NEG_CROP;i++) {
2012 cropTbl[i] = 0;
2013 cropTbl[i + MAX_NEG_CROP + 256] = 255;
2014 }
de6d9b64 2015
5abd509a
ZK
2016 for(i=0;i<512;i++) {
2017 squareTbl[i] = (i - 256) * (i - 256);
2018 }
92ddb692
ZK
2019
2020 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
2021
2022 init_done = 1;
de6d9b64
FB
2023 }
2024
b0368839
MN
2025#ifdef CONFIG_ENCODERS
2026 if(avctx->dct_algo==FF_DCT_FASTINT)
2027 c->fdct = fdct_ifast;
2028 else
2029 c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
2030#endif //CONFIG_ENCODERS
2031
2032 if(avctx->idct_algo==FF_IDCT_INT){
2033 c->idct_put= ff_jref_idct_put;
2034 c->idct_add= ff_jref_idct_add;
2035 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
2036 }else{ //accurate/default
2037 c->idct_put= simple_idct_put;
2038 c->idct_add= simple_idct_add;
2039 c->idct_permutation_type= FF_NO_IDCT_PERM;
2040 }
2041
eb4b3dd3
ZK
2042 c->get_pixels = get_pixels_c;
2043 c->diff_pixels = diff_pixels_c;
2044 c->put_pixels_clamped = put_pixels_clamped_c;
2045 c->add_pixels_clamped = add_pixels_clamped_c;
2046 c->gmc1 = gmc1_c;
2047 c->gmc = gmc_c;
2048 c->clear_blocks = clear_blocks_c;
2049 c->pix_sum = pix_sum_c;
2050 c->pix_norm1 = pix_norm1_c;
1457ab52
MN
2051 c->sse[0]= sse16_c;
2052 c->sse[1]= sse8_c;
eb4b3dd3 2053
45553457 2054 /* TODO [0] 16 [1] 8 */
eb4b3dd3
ZK
2055 c->pix_abs16x16 = pix_abs16x16_c;
2056 c->pix_abs16x16_x2 = pix_abs16x16_x2_c;
2057 c->pix_abs16x16_y2 = pix_abs16x16_y2_c;
2058 c->pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
2059 c->pix_abs8x8 = pix_abs8x8_c;
2060 c->pix_abs8x8_x2 = pix_abs8x8_x2_c;
2061 c->pix_abs8x8_y2 = pix_abs8x8_y2_c;
2062 c->pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
2063
45553457
ZK
2064#define dspfunc(PFX, IDX, NUM) \
2065 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
2066 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \
2067 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \
2068 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
2069
2070 dspfunc(put, 0, 16);
2071 dspfunc(put_no_rnd, 0, 16);
2072 dspfunc(put, 1, 8);
2073 dspfunc(put_no_rnd, 1, 8);
2074
2075 dspfunc(avg, 0, 16);
2076 dspfunc(avg_no_rnd, 0, 16);
2077 dspfunc(avg, 1, 8);
2078 dspfunc(avg_no_rnd, 1, 8);
2079#undef dspfunc
2080
2081#define dspfunc(PFX, IDX, NUM) \
2082 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2083 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2084 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2085 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2086 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2087 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2088 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2089 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2090 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2091 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2092 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2093 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2094 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2095 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2096 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2097 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2098
2099 dspfunc(put_qpel, 0, 16);
2100 dspfunc(put_no_rnd_qpel, 0, 16);
2101
2102 dspfunc(avg_qpel, 0, 16);
2103 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
2104
2105 dspfunc(put_qpel, 1, 8);
2106 dspfunc(put_no_rnd_qpel, 1, 8);
2107
2108 dspfunc(avg_qpel, 1, 8);
2109 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
2110#undef dspfunc
c9a2ebc4 2111
1457ab52
MN
2112 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
2113 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
2114 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
2115 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
2116 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
2117 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
2118 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
2119 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
2120
2121 c->hadamard8_diff[0]= hadamard8_diff16_c;
2122 c->hadamard8_diff[1]= hadamard8_diff_c;
2123 c->hadamard8_abs = hadamard8_abs_c;
2124
2125 c->dct_sad[0]= dct_sad16x16_c;
2126 c->dct_sad[1]= dct_sad8x8_c;
2127
2128 c->sad[0]= sad16x16_c;
2129 c->sad[1]= sad8x8_c;
2130
2131 c->quant_psnr[0]= quant_psnr16x16_c;
2132 c->quant_psnr[1]= quant_psnr8x8_c;
3a87ac94
MN
2133
2134 c->rd[0]= rd16x16_c;
2135 c->rd[1]= rd8x8_c;
2136
2137 c->bit[0]= bit16x16_c;
2138 c->bit[1]= bit8x8_c;
2139
11f18faf
MN
2140 c->add_bytes= add_bytes_c;
2141 c->diff_bytes= diff_bytes_c;
2142
980fc7b8 2143#ifdef HAVE_MMX
b0368839 2144 dsputil_init_mmx(c, avctx);
de6d9b64 2145#endif
3d03c0a2 2146#ifdef ARCH_ARMV4L
b0368839 2147 dsputil_init_armv4l(c, avctx);
3d03c0a2 2148#endif
c34270f5 2149#ifdef HAVE_MLIB
b0368839 2150 dsputil_init_mlib(c, avctx);
c34270f5 2151#endif
1e98dffb 2152#ifdef ARCH_ALPHA
b0368839 2153 dsputil_init_alpha(c, avctx);
1e98dffb 2154#endif
59925ef2 2155#ifdef ARCH_POWERPC
b0368839 2156 dsputil_init_ppc(c, avctx);
a43bd1d7 2157#endif
d46aba26 2158#ifdef HAVE_MMI
b0368839 2159 dsputil_init_mmi(c, avctx);
d46aba26 2160#endif
43f1708f 2161
b0368839
MN
2162 switch(c->idct_permutation_type){
2163 case FF_NO_IDCT_PERM:
2164 for(i=0; i<64; i++)
2165 c->idct_permutation[i]= i;
2166 break;
2167 case FF_LIBMPEG2_IDCT_PERM:
2168 for(i=0; i<64; i++)
2169 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
2170 break;
2171 case FF_SIMPLE_IDCT_PERM:
2172 for(i=0; i<64; i++)
2173 c->idct_permutation[i]= simple_mmx_permutation[i];
2174 break;
2175 case FF_TRANSPOSE_IDCT_PERM:
2176 for(i=0; i<64; i++)
2177 c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
2178 break;
2179 default:
2180 fprintf(stderr, "Internal error, IDCT permutation not set\n");
2181 }
57060b1e 2182}
b0368839 2183