fix identifier to fix compilation
[libav.git] / libavcodec / dsputil.c
CommitLineData
de6d9b64
FB
1/*
2 * DSP utils
ff4ec49e 3 * Copyright (c) 2000, 2001 Fabrice Bellard.
de6d9b64 4 *
ff4ec49e
FB
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
de6d9b64 9 *
ff4ec49e 10 * This library is distributed in the hope that it will be useful,
de6d9b64 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ff4ec49e
FB
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
de6d9b64 14 *
ff4ec49e
FB
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
7ff037e9 18 *
59fe111e 19 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
de6d9b64 20 */
983e3246
MN
21
22/**
23 * @file dsputil.c
24 * DSP utils
25 */
26
de6d9b64
FB
27#include "avcodec.h"
28#include "dsputil.h"
1457ab52 29#include "mpegvideo.h"
b0368839 30#include "simple_idct.h"
45553457 31
5596c60c 32
0c1a9eda
ZK
33uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
34uint32_t squareTbl[512];
de6d9b64 35
0c1a9eda 36const uint8_t ff_zigzag_direct[64] = {
2ad1516a
MN
37 0, 1, 8, 16, 9, 2, 3, 10,
38 17, 24, 32, 25, 18, 11, 4, 5,
e0eac44e 39 12, 19, 26, 33, 40, 48, 41, 34,
2ad1516a 40 27, 20, 13, 6, 7, 14, 21, 28,
e0eac44e
FB
41 35, 42, 49, 56, 57, 50, 43, 36,
42 29, 22, 15, 23, 30, 37, 44, 51,
43 58, 59, 52, 45, 38, 31, 39, 46,
44 53, 60, 61, 54, 47, 55, 62, 63
45};
46
2f349de2 47/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
0c1a9eda 48uint16_t __align8 inv_zigzag_direct16[64];
2f349de2 49
0c1a9eda 50const uint8_t ff_alternate_horizontal_scan[64] = {
2ad1516a 51 0, 1, 2, 3, 8, 9, 16, 17,
e0eac44e
FB
52 10, 11, 4, 5, 6, 7, 15, 14,
53 13, 12, 19, 18, 24, 25, 32, 33,
54 26, 27, 20, 21, 22, 23, 28, 29,
55 30, 31, 34, 35, 40, 41, 48, 49,
56 42, 43, 36, 37, 38, 39, 44, 45,
57 46, 47, 50, 51, 56, 57, 58, 59,
58 52, 53, 54, 55, 60, 61, 62, 63,
59};
60
0c1a9eda 61const uint8_t ff_alternate_vertical_scan[64] = {
2ad1516a 62 0, 8, 16, 24, 1, 9, 2, 10,
e0eac44e
FB
63 17, 25, 32, 40, 48, 56, 57, 49,
64 41, 33, 26, 18, 3, 11, 4, 12,
65 19, 27, 34, 42, 50, 58, 35, 43,
66 51, 59, 20, 28, 5, 13, 6, 14,
67 21, 29, 36, 44, 52, 60, 37, 45,
68 53, 61, 22, 30, 7, 15, 23, 31,
69 38, 46, 54, 62, 39, 47, 55, 63,
70};
71
2f349de2 72/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
0c1a9eda 73const uint32_t inverse[256]={
2f349de2
MN
74 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
75 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
76 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
77 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
78 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
79 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
80 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
81 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
82 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
83 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
84 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
85 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
86 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
87 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
88 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
89 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
90 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
91 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
92 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
93 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
94 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
95 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
96 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
97 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
98 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
99 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
100 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
101 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
102 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
103 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
104 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
105 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
106};
107
b0368839
MN
108/* Input permutation for the simple_idct_mmx */
109static const uint8_t simple_mmx_permutation[64]={
110 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
111 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
112 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
113 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
114 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
115 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
116 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
117 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
118};
119
0c1a9eda 120static int pix_sum_c(uint8_t * pix, int line_size)
3aa102be
MN
121{
122 int s, i, j;
123
124 s = 0;
125 for (i = 0; i < 16; i++) {
126 for (j = 0; j < 16; j += 8) {
127 s += pix[0];
128 s += pix[1];
129 s += pix[2];
130 s += pix[3];
131 s += pix[4];
132 s += pix[5];
133 s += pix[6];
134 s += pix[7];
135 pix += 8;
136 }
137 pix += line_size - 16;
138 }
139 return s;
140}
141
0c1a9eda 142static int pix_norm1_c(uint8_t * pix, int line_size)
3aa102be
MN
143{
144 int s, i, j;
0c1a9eda 145 uint32_t *sq = squareTbl + 256;
3aa102be
MN
146
147 s = 0;
148 for (i = 0; i < 16; i++) {
149 for (j = 0; j < 16; j += 8) {
2a006cd3 150#if 0
3aa102be
MN
151 s += sq[pix[0]];
152 s += sq[pix[1]];
153 s += sq[pix[2]];
154 s += sq[pix[3]];
155 s += sq[pix[4]];
156 s += sq[pix[5]];
157 s += sq[pix[6]];
158 s += sq[pix[7]];
2a006cd3
FL
159#else
160#if LONG_MAX > 2147483647
161 register uint64_t x=*(uint64_t*)pix;
162 s += sq[x&0xff];
163 s += sq[(x>>8)&0xff];
164 s += sq[(x>>16)&0xff];
165 s += sq[(x>>24)&0xff];
166 s += sq[(x>>32)&0xff];
167 s += sq[(x>>40)&0xff];
168 s += sq[(x>>48)&0xff];
169 s += sq[(x>>56)&0xff];
170#else
171 register uint32_t x=*(uint32_t*)pix;
172 s += sq[x&0xff];
173 s += sq[(x>>8)&0xff];
174 s += sq[(x>>16)&0xff];
175 s += sq[(x>>24)&0xff];
176 x=*(uint32_t*)(pix+4);
177 s += sq[x&0xff];
178 s += sq[(x>>8)&0xff];
179 s += sq[(x>>16)&0xff];
180 s += sq[(x>>24)&0xff];
181#endif
182#endif
3aa102be
MN
183 pix += 8;
184 }
185 pix += line_size - 16;
186 }
187 return s;
188}
189
3d2e8cce
MN
190static void bswap_buf(uint32_t *dst, uint32_t *src, int w){
191 int i;
192
193 for(i=0; i+8<=w; i+=8){
194 dst[i+0]= bswap_32(src[i+0]);
195 dst[i+1]= bswap_32(src[i+1]);
196 dst[i+2]= bswap_32(src[i+2]);
197 dst[i+3]= bswap_32(src[i+3]);
198 dst[i+4]= bswap_32(src[i+4]);
199 dst[i+5]= bswap_32(src[i+5]);
200 dst[i+6]= bswap_32(src[i+6]);
201 dst[i+7]= bswap_32(src[i+7]);
202 }
203 for(;i<w; i++){
204 dst[i+0]= bswap_32(src[i+0]);
205 }
206}
3aa102be 207
0c1a9eda 208static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size)
1457ab52
MN
209{
210 int s, i;
0c1a9eda 211 uint32_t *sq = squareTbl + 256;
1457ab52
MN
212
213 s = 0;
214 for (i = 0; i < 8; i++) {
215 s += sq[pix1[0] - pix2[0]];
216 s += sq[pix1[1] - pix2[1]];
217 s += sq[pix1[2] - pix2[2]];
218 s += sq[pix1[3] - pix2[3]];
219 s += sq[pix1[4] - pix2[4]];
220 s += sq[pix1[5] - pix2[5]];
221 s += sq[pix1[6] - pix2[6]];
222 s += sq[pix1[7] - pix2[7]];
223 pix1 += line_size;
224 pix2 += line_size;
225 }
226 return s;
227}
228
6b026927 229static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size)
9c76bd48 230{
6b026927
FH
231 int s, i;
232 uint32_t *sq = squareTbl + 256;
9c76bd48
BF
233
234 s = 0;
235 for (i = 0; i < 16; i++) {
6b026927
FH
236 s += sq[pix1[ 0] - pix2[ 0]];
237 s += sq[pix1[ 1] - pix2[ 1]];
238 s += sq[pix1[ 2] - pix2[ 2]];
239 s += sq[pix1[ 3] - pix2[ 3]];
240 s += sq[pix1[ 4] - pix2[ 4]];
241 s += sq[pix1[ 5] - pix2[ 5]];
242 s += sq[pix1[ 6] - pix2[ 6]];
243 s += sq[pix1[ 7] - pix2[ 7]];
244 s += sq[pix1[ 8] - pix2[ 8]];
245 s += sq[pix1[ 9] - pix2[ 9]];
246 s += sq[pix1[10] - pix2[10]];
247 s += sq[pix1[11] - pix2[11]];
248 s += sq[pix1[12] - pix2[12]];
249 s += sq[pix1[13] - pix2[13]];
250 s += sq[pix1[14] - pix2[14]];
251 s += sq[pix1[15] - pix2[15]];
2a006cd3 252
6b026927
FH
253 pix1 += line_size;
254 pix2 += line_size;
9c76bd48
BF
255 }
256 return s;
257}
258
0c1a9eda 259static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
de6d9b64 260{
de6d9b64
FB
261 int i;
262
263 /* read the pixels */
de6d9b64 264 for(i=0;i<8;i++) {
c13e1abd
FH
265 block[0] = pixels[0];
266 block[1] = pixels[1];
267 block[2] = pixels[2];
268 block[3] = pixels[3];
269 block[4] = pixels[4];
270 block[5] = pixels[5];
271 block[6] = pixels[6];
272 block[7] = pixels[7];
273 pixels += line_size;
274 block += 8;
de6d9b64
FB
275 }
276}
277
0c1a9eda
ZK
278static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
279 const uint8_t *s2, int stride){
9dbcbd92
MN
280 int i;
281
282 /* read the pixels */
9dbcbd92 283 for(i=0;i<8;i++) {
c13e1abd
FH
284 block[0] = s1[0] - s2[0];
285 block[1] = s1[1] - s2[1];
286 block[2] = s1[2] - s2[2];
287 block[3] = s1[3] - s2[3];
288 block[4] = s1[4] - s2[4];
289 block[5] = s1[5] - s2[5];
290 block[6] = s1[6] - s2[6];
291 block[7] = s1[7] - s2[7];
9dbcbd92
MN
292 s1 += stride;
293 s2 += stride;
c13e1abd 294 block += 8;
9dbcbd92
MN
295 }
296}
297
298
0c1a9eda 299static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
eb4b3dd3 300 int line_size)
de6d9b64 301{
de6d9b64 302 int i;
0c1a9eda 303 uint8_t *cm = cropTbl + MAX_NEG_CROP;
de6d9b64
FB
304
305 /* read the pixels */
de6d9b64 306 for(i=0;i<8;i++) {
c13e1abd
FH
307 pixels[0] = cm[block[0]];
308 pixels[1] = cm[block[1]];
309 pixels[2] = cm[block[2]];
310 pixels[3] = cm[block[3]];
311 pixels[4] = cm[block[4]];
312 pixels[5] = cm[block[5]];
313 pixels[6] = cm[block[6]];
314 pixels[7] = cm[block[7]];
315
316 pixels += line_size;
317 block += 8;
de6d9b64
FB
318 }
319}
320
0c1a9eda 321static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
c13e1abd 322 int line_size)
de6d9b64 323{
de6d9b64 324 int i;
0c1a9eda 325 uint8_t *cm = cropTbl + MAX_NEG_CROP;
de6d9b64
FB
326
327 /* read the pixels */
de6d9b64 328 for(i=0;i<8;i++) {
c13e1abd
FH
329 pixels[0] = cm[pixels[0] + block[0]];
330 pixels[1] = cm[pixels[1] + block[1]];
331 pixels[2] = cm[pixels[2] + block[2]];
332 pixels[3] = cm[pixels[3] + block[3]];
333 pixels[4] = cm[pixels[4] + block[4]];
334 pixels[5] = cm[pixels[5] + block[5]];
335 pixels[6] = cm[pixels[6] + block[6]];
336 pixels[7] = cm[pixels[7] + block[7]];
337 pixels += line_size;
338 block += 8;
de6d9b64
FB
339 }
340}
59fe111e
MN
341#if 0
342
343#define PIXOP2(OPNAME, OP) \
b3184779 344static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
345{\
346 int i;\
347 for(i=0; i<h; i++){\
348 OP(*((uint64_t*)block), LD64(pixels));\
349 pixels+=line_size;\
350 block +=line_size;\
351 }\
352}\
353\
45553457 354static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
355{\
356 int i;\
357 for(i=0; i<h; i++){\
358 const uint64_t a= LD64(pixels );\
359 const uint64_t b= LD64(pixels+1);\
360 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
361 pixels+=line_size;\
362 block +=line_size;\
363 }\
364}\
365\
45553457 366static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
367{\
368 int i;\
369 for(i=0; i<h; i++){\
370 const uint64_t a= LD64(pixels );\
371 const uint64_t b= LD64(pixels+1);\
372 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
373 pixels+=line_size;\
374 block +=line_size;\
375 }\
376}\
377\
45553457 378static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
379{\
380 int i;\
381 for(i=0; i<h; i++){\
382 const uint64_t a= LD64(pixels );\
383 const uint64_t b= LD64(pixels+line_size);\
384 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
385 pixels+=line_size;\
386 block +=line_size;\
387 }\
388}\
389\
45553457 390static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
391{\
392 int i;\
393 for(i=0; i<h; i++){\
394 const uint64_t a= LD64(pixels );\
395 const uint64_t b= LD64(pixels+line_size);\
396 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
397 pixels+=line_size;\
398 block +=line_size;\
399 }\
400}\
401\
45553457 402static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
403{\
404 int i;\
405 const uint64_t a= LD64(pixels );\
406 const uint64_t b= LD64(pixels+1);\
407 uint64_t l0= (a&0x0303030303030303ULL)\
408 + (b&0x0303030303030303ULL)\
409 + 0x0202020202020202ULL;\
410 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
411 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
412 uint64_t l1,h1;\
413\
414 pixels+=line_size;\
415 for(i=0; i<h; i+=2){\
416 uint64_t a= LD64(pixels );\
417 uint64_t b= LD64(pixels+1);\
418 l1= (a&0x0303030303030303ULL)\
419 + (b&0x0303030303030303ULL);\
420 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
421 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
422 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
423 pixels+=line_size;\
424 block +=line_size;\
425 a= LD64(pixels );\
426 b= LD64(pixels+1);\
427 l0= (a&0x0303030303030303ULL)\
428 + (b&0x0303030303030303ULL)\
429 + 0x0202020202020202ULL;\
430 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
431 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
432 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
433 pixels+=line_size;\
434 block +=line_size;\
435 }\
436}\
437\
45553457 438static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
439{\
440 int i;\
441 const uint64_t a= LD64(pixels );\
442 const uint64_t b= LD64(pixels+1);\
443 uint64_t l0= (a&0x0303030303030303ULL)\
444 + (b&0x0303030303030303ULL)\
445 + 0x0101010101010101ULL;\
446 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
447 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
448 uint64_t l1,h1;\
449\
450 pixels+=line_size;\
451 for(i=0; i<h; i+=2){\
452 uint64_t a= LD64(pixels );\
453 uint64_t b= LD64(pixels+1);\
454 l1= (a&0x0303030303030303ULL)\
455 + (b&0x0303030303030303ULL);\
456 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
457 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
458 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
459 pixels+=line_size;\
460 block +=line_size;\
461 a= LD64(pixels );\
462 b= LD64(pixels+1);\
463 l0= (a&0x0303030303030303ULL)\
464 + (b&0x0303030303030303ULL)\
465 + 0x0101010101010101ULL;\
466 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
467 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
468 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
469 pixels+=line_size;\
470 block +=line_size;\
471 }\
472}\
473\
45553457
ZK
474CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
475CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
476CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
477CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
478CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
479CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
480CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
59fe111e
MN
481
482#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
483#else // 64 bit variant
484
485#define PIXOP2(OPNAME, OP) \
669ac79c
MN
486static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
487 int i;\
488 for(i=0; i<h; i++){\
489 OP(*((uint16_t*)(block )), LD16(pixels ));\
490 pixels+=line_size;\
491 block +=line_size;\
492 }\
493}\
0da71265
MN
494static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
495 int i;\
496 for(i=0; i<h; i++){\
497 OP(*((uint32_t*)(block )), LD32(pixels ));\
498 pixels+=line_size;\
499 block +=line_size;\
500 }\
501}\
45553457 502static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
59fe111e
MN
503 int i;\
504 for(i=0; i<h; i++){\
505 OP(*((uint32_t*)(block )), LD32(pixels ));\
506 OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
507 pixels+=line_size;\
508 block +=line_size;\
509 }\
510}\
45553457
ZK
511static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
512 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
b3184779 513}\
59fe111e 514\
b3184779
MN
515static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
516 int src_stride1, int src_stride2, int h){\
59fe111e
MN
517 int i;\
518 for(i=0; i<h; i++){\
b3184779
MN
519 uint32_t a,b;\
520 a= LD32(&src1[i*src_stride1 ]);\
521 b= LD32(&src2[i*src_stride2 ]);\
d8085ea7 522 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
b3184779
MN
523 a= LD32(&src1[i*src_stride1+4]);\
524 b= LD32(&src2[i*src_stride2+4]);\
d8085ea7 525 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
59fe111e
MN
526 }\
527}\
528\
b3184779
MN
529static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
530 int src_stride1, int src_stride2, int h){\
59fe111e
MN
531 int i;\
532 for(i=0; i<h; i++){\
b3184779
MN
533 uint32_t a,b;\
534 a= LD32(&src1[i*src_stride1 ]);\
535 b= LD32(&src2[i*src_stride2 ]);\
d8085ea7 536 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
b3184779
MN
537 a= LD32(&src1[i*src_stride1+4]);\
538 b= LD32(&src2[i*src_stride2+4]);\
d8085ea7 539 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
59fe111e
MN
540 }\
541}\
542\
0da71265
MN
543static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
544 int src_stride1, int src_stride2, int h){\
545 int i;\
546 for(i=0; i<h; i++){\
547 uint32_t a,b;\
548 a= LD32(&src1[i*src_stride1 ]);\
549 b= LD32(&src2[i*src_stride2 ]);\
d8085ea7 550 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
0da71265
MN
551 }\
552}\
553\
669ac79c
MN
554static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
555 int src_stride1, int src_stride2, int h){\
556 int i;\
557 for(i=0; i<h; i++){\
558 uint32_t a,b;\
559 a= LD16(&src1[i*src_stride1 ]);\
560 b= LD16(&src2[i*src_stride2 ]);\
561 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
562 }\
563}\
564\
b3184779
MN
565static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
566 int src_stride1, int src_stride2, int h){\
567 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
568 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
569}\
570\
571static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
572 int src_stride1, int src_stride2, int h){\
573 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
574 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
575}\
576\
45553457 577static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
578 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
579}\
580\
45553457 581static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
582 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
583}\
584\
45553457 585static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
586 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
587}\
588\
45553457 589static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
590 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
591}\
592\
593static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
594 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
59fe111e
MN
595 int i;\
596 for(i=0; i<h; i++){\
b3184779
MN
597 uint32_t a, b, c, d, l0, l1, h0, h1;\
598 a= LD32(&src1[i*src_stride1]);\
599 b= LD32(&src2[i*src_stride2]);\
600 c= LD32(&src3[i*src_stride3]);\
601 d= LD32(&src4[i*src_stride4]);\
602 l0= (a&0x03030303UL)\
603 + (b&0x03030303UL)\
604 + 0x02020202UL;\
605 h0= ((a&0xFCFCFCFCUL)>>2)\
606 + ((b&0xFCFCFCFCUL)>>2);\
607 l1= (c&0x03030303UL)\
608 + (d&0x03030303UL);\
609 h1= ((c&0xFCFCFCFCUL)>>2)\
610 + ((d&0xFCFCFCFCUL)>>2);\
611 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
612 a= LD32(&src1[i*src_stride1+4]);\
613 b= LD32(&src2[i*src_stride2+4]);\
614 c= LD32(&src3[i*src_stride3+4]);\
615 d= LD32(&src4[i*src_stride4+4]);\
616 l0= (a&0x03030303UL)\
617 + (b&0x03030303UL)\
618 + 0x02020202UL;\
619 h0= ((a&0xFCFCFCFCUL)>>2)\
620 + ((b&0xFCFCFCFCUL)>>2);\
621 l1= (c&0x03030303UL)\
622 + (d&0x03030303UL);\
623 h1= ((c&0xFCFCFCFCUL)>>2)\
624 + ((d&0xFCFCFCFCUL)>>2);\
625 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
59fe111e
MN
626 }\
627}\
669ac79c
MN
628\
629static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
630 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
631}\
632\
633static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
634 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
635}\
636\
637static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
638 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
639}\
640\
641static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
642 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
643}\
644\
b3184779
MN
645static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
646 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
59fe111e
MN
647 int i;\
648 for(i=0; i<h; i++){\
b3184779
MN
649 uint32_t a, b, c, d, l0, l1, h0, h1;\
650 a= LD32(&src1[i*src_stride1]);\
651 b= LD32(&src2[i*src_stride2]);\
652 c= LD32(&src3[i*src_stride3]);\
653 d= LD32(&src4[i*src_stride4]);\
654 l0= (a&0x03030303UL)\
655 + (b&0x03030303UL)\
656 + 0x01010101UL;\
657 h0= ((a&0xFCFCFCFCUL)>>2)\
658 + ((b&0xFCFCFCFCUL)>>2);\
659 l1= (c&0x03030303UL)\
660 + (d&0x03030303UL);\
661 h1= ((c&0xFCFCFCFCUL)>>2)\
662 + ((d&0xFCFCFCFCUL)>>2);\
663 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
664 a= LD32(&src1[i*src_stride1+4]);\
665 b= LD32(&src2[i*src_stride2+4]);\
666 c= LD32(&src3[i*src_stride3+4]);\
667 d= LD32(&src4[i*src_stride4+4]);\
668 l0= (a&0x03030303UL)\
669 + (b&0x03030303UL)\
670 + 0x01010101UL;\
671 h0= ((a&0xFCFCFCFCUL)>>2)\
672 + ((b&0xFCFCFCFCUL)>>2);\
673 l1= (c&0x03030303UL)\
674 + (d&0x03030303UL);\
675 h1= ((c&0xFCFCFCFCUL)>>2)\
676 + ((d&0xFCFCFCFCUL)>>2);\
677 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
59fe111e
MN
678 }\
679}\
b3184779
MN
680static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
681 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
682 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
683 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
684}\
685static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
686 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
687 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
688 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
689}\
59fe111e 690\
669ac79c
MN
691static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
692{\
693 int i, a0, b0, a1, b1;\
694 a0= pixels[0];\
695 b0= pixels[1] + 2;\
696 a0 += b0;\
697 b0 += pixels[2];\
698\
699 pixels+=line_size;\
700 for(i=0; i<h; i+=2){\
701 a1= pixels[0];\
702 b1= pixels[1];\
703 a1 += b1;\
704 b1 += pixels[2];\
705\
706 block[0]= (a1+a0)>>2; /* FIXME non put */\
707 block[1]= (b1+b0)>>2;\
708\
709 pixels+=line_size;\
710 block +=line_size;\
711\
712 a0= pixels[0];\
713 b0= pixels[1] + 2;\
714 a0 += b0;\
715 b0 += pixels[2];\
716\
717 block[0]= (a1+a0)>>2;\
718 block[1]= (b1+b0)>>2;\
719 pixels+=line_size;\
720 block +=line_size;\
721 }\
722}\
723\
724static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
725{\
726 int i;\
727 const uint32_t a= LD32(pixels );\
728 const uint32_t b= LD32(pixels+1);\
729 uint32_t l0= (a&0x03030303UL)\
730 + (b&0x03030303UL)\
731 + 0x02020202UL;\
732 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
733 + ((b&0xFCFCFCFCUL)>>2);\
734 uint32_t l1,h1;\
735\
736 pixels+=line_size;\
737 for(i=0; i<h; i+=2){\
738 uint32_t a= LD32(pixels );\
739 uint32_t b= LD32(pixels+1);\
740 l1= (a&0x03030303UL)\
741 + (b&0x03030303UL);\
742 h1= ((a&0xFCFCFCFCUL)>>2)\
743 + ((b&0xFCFCFCFCUL)>>2);\
744 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
745 pixels+=line_size;\
746 block +=line_size;\
747 a= LD32(pixels );\
748 b= LD32(pixels+1);\
749 l0= (a&0x03030303UL)\
750 + (b&0x03030303UL)\
751 + 0x02020202UL;\
752 h0= ((a&0xFCFCFCFCUL)>>2)\
753 + ((b&0xFCFCFCFCUL)>>2);\
754 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
755 pixels+=line_size;\
756 block +=line_size;\
757 }\
758}\
759\
45553457 760static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
761{\
762 int j;\
763 for(j=0; j<2; j++){\
764 int i;\
765 const uint32_t a= LD32(pixels );\
766 const uint32_t b= LD32(pixels+1);\
767 uint32_t l0= (a&0x03030303UL)\
768 + (b&0x03030303UL)\
769 + 0x02020202UL;\
770 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
771 + ((b&0xFCFCFCFCUL)>>2);\
772 uint32_t l1,h1;\
773\
774 pixels+=line_size;\
775 for(i=0; i<h; i+=2){\
776 uint32_t a= LD32(pixels );\
777 uint32_t b= LD32(pixels+1);\
778 l1= (a&0x03030303UL)\
779 + (b&0x03030303UL);\
780 h1= ((a&0xFCFCFCFCUL)>>2)\
781 + ((b&0xFCFCFCFCUL)>>2);\
782 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
783 pixels+=line_size;\
784 block +=line_size;\
785 a= LD32(pixels );\
786 b= LD32(pixels+1);\
787 l0= (a&0x03030303UL)\
788 + (b&0x03030303UL)\
789 + 0x02020202UL;\
790 h0= ((a&0xFCFCFCFCUL)>>2)\
791 + ((b&0xFCFCFCFCUL)>>2);\
792 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
793 pixels+=line_size;\
794 block +=line_size;\
795 }\
796 pixels+=4-line_size*(h+1);\
797 block +=4-line_size*h;\
798 }\
799}\
800\
45553457 801static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
802{\
803 int j;\
804 for(j=0; j<2; j++){\
805 int i;\
806 const uint32_t a= LD32(pixels );\
807 const uint32_t b= LD32(pixels+1);\
808 uint32_t l0= (a&0x03030303UL)\
809 + (b&0x03030303UL)\
810 + 0x01010101UL;\
811 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
812 + ((b&0xFCFCFCFCUL)>>2);\
813 uint32_t l1,h1;\
814\
815 pixels+=line_size;\
816 for(i=0; i<h; i+=2){\
817 uint32_t a= LD32(pixels );\
818 uint32_t b= LD32(pixels+1);\
819 l1= (a&0x03030303UL)\
820 + (b&0x03030303UL);\
821 h1= ((a&0xFCFCFCFCUL)>>2)\
822 + ((b&0xFCFCFCFCUL)>>2);\
823 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
824 pixels+=line_size;\
825 block +=line_size;\
826 a= LD32(pixels );\
827 b= LD32(pixels+1);\
828 l0= (a&0x03030303UL)\
829 + (b&0x03030303UL)\
830 + 0x01010101UL;\
831 h0= ((a&0xFCFCFCFCUL)>>2)\
832 + ((b&0xFCFCFCFCUL)>>2);\
833 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
834 pixels+=line_size;\
835 block +=line_size;\
836 }\
837 pixels+=4-line_size*(h+1);\
838 block +=4-line_size*h;\
839 }\
840}\
841\
45553457
ZK
842CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
843CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
844CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
845CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
846CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
847CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
848CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
849CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
b3184779 850
d8085ea7 851#define op_avg(a, b) a = rnd_avg32(a, b)
59fe111e 852#endif
59fe111e
MN
853#define op_put(a, b) a = b
854
855PIXOP2(avg, op_avg)
856PIXOP2(put, op_put)
857#undef op_avg
858#undef op_put
859
de6d9b64
FB
860#define avg2(a,b) ((a+b+1)>>1)
861#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
862
073b013d 863
0c1a9eda 864static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
44eb4951
MN
865{
866 const int A=(16-x16)*(16-y16);
867 const int B=( x16)*(16-y16);
868 const int C=(16-x16)*( y16);
869 const int D=( x16)*( y16);
870 int i;
44eb4951
MN
871
872 for(i=0; i<h; i++)
873 {
b3184779
MN
874 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
875 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
876 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
877 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
878 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
879 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
880 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
881 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
882 dst+= stride;
883 src+= stride;
44eb4951
MN
884 }
885}
886
0c1a9eda 887static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
073b013d
MN
888 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
889{
890 int y, vx, vy;
891 const int s= 1<<shift;
892
893 width--;
894 height--;
895
896 for(y=0; y<h; y++){
897 int x;
898
899 vx= ox;
900 vy= oy;
901 for(x=0; x<8; x++){ //XXX FIXME optimize
902 int src_x, src_y, frac_x, frac_y, index;
903
904 src_x= vx>>16;
905 src_y= vy>>16;
906 frac_x= src_x&(s-1);
907 frac_y= src_y&(s-1);
908 src_x>>=shift;
909 src_y>>=shift;
910
911 if((unsigned)src_x < width){
912 if((unsigned)src_y < height){
913 index= src_x + src_y*stride;
914 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
915 + src[index +1]* frac_x )*(s-frac_y)
916 + ( src[index+stride ]*(s-frac_x)
917 + src[index+stride+1]* frac_x )* frac_y
918 + r)>>(shift*2);
919 }else{
920 index= src_x + clip(src_y, 0, height)*stride;
921 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
922 + src[index +1]* frac_x )*s
923 + r)>>(shift*2);
924 }
925 }else{
926 if((unsigned)src_y < height){
927 index= clip(src_x, 0, width) + src_y*stride;
928 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
929 + src[index+stride ]* frac_y )*s
930 + r)>>(shift*2);
931 }else{
932 index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride;
933 dst[y*stride + x]= src[index ];
934 }
935 }
936
937 vx+= dxx;
938 vy+= dyx;
939 }
940 ox += dxy;
941 oy += dyy;
942 }
943}
669ac79c
MN
944
945static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
946 switch(width){
947 case 2: put_pixels2_c (dst, src, stride, height); break;
948 case 4: put_pixels4_c (dst, src, stride, height); break;
949 case 8: put_pixels8_c (dst, src, stride, height); break;
950 case 16:put_pixels16_c(dst, src, stride, height); break;
951 }
952}
953
954static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
955 int i,j;
956 for (i=0; i < height; i++) {
957 for (j=0; j < width; j++) {
958 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
959 }
960 src += stride;
961 dst += stride;
962 }
963}
964
965static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
966 int i,j;
967 for (i=0; i < height; i++) {
968 for (j=0; j < width; j++) {
969 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
970 }
971 src += stride;
972 dst += stride;
973 }
974}
975
976static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
977 int i,j;
978 for (i=0; i < height; i++) {
979 for (j=0; j < width; j++) {
980 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
981 }
982 src += stride;
983 dst += stride;
984 }
985}
986
987static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
988 int i,j;
989 for (i=0; i < height; i++) {
990 for (j=0; j < width; j++) {
991 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
992 }
993 src += stride;
994 dst += stride;
995 }
996}
997
998static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
999 int i,j;
1000 for (i=0; i < height; i++) {
1001 for (j=0; j < width; j++) {
1002 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
1003 }
1004 src += stride;
1005 dst += stride;
1006 }
1007}
1008
1009static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1010 int i,j;
1011 for (i=0; i < height; i++) {
1012 for (j=0; j < width; j++) {
1013 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
1014 }
1015 src += stride;
1016 dst += stride;
1017 }
1018}
1019
1020static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1021 int i,j;
1022 for (i=0; i < height; i++) {
1023 for (j=0; j < width; j++) {
1024 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
1025 }
1026 src += stride;
1027 dst += stride;
1028 }
1029}
1030
1031static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1032 int i,j;
1033 for (i=0; i < height; i++) {
1034 for (j=0; j < width; j++) {
1035 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
1036 }
1037 src += stride;
1038 dst += stride;
1039 }
1040}
1041#if 0
1042#define TPEL_WIDTH(width)\
1043static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1044 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
1045static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1046 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
1047static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1048 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
1049static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1050 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
1051static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1052 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
1053static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1054 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
1055static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1056 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
1057static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1058 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
1059static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1060 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
1061#endif
1062
0da71265
MN
1063#define H264_CHROMA_MC(OPNAME, OP)\
1064static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1065 const int A=(8-x)*(8-y);\
1066 const int B=( x)*(8-y);\
1067 const int C=(8-x)*( y);\
1068 const int D=( x)*( y);\
1069 int i;\
1070 \
1071 assert(x<8 && y<8 && x>=0 && y>=0);\
1072\
1073 for(i=0; i<h; i++)\
1074 {\
1075 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1076 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1077 dst+= stride;\
1078 src+= stride;\
1079 }\
1080}\
1081\
1082static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1083 const int A=(8-x)*(8-y);\
1084 const int B=( x)*(8-y);\
1085 const int C=(8-x)*( y);\
1086 const int D=( x)*( y);\
1087 int i;\
1088 \
1089 assert(x<8 && y<8 && x>=0 && y>=0);\
1090\
1091 for(i=0; i<h; i++)\
1092 {\
1093 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1094 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1095 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1096 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1097 dst+= stride;\
1098 src+= stride;\
1099 }\
1100}\
1101\
1102static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1103 const int A=(8-x)*(8-y);\
1104 const int B=( x)*(8-y);\
1105 const int C=(8-x)*( y);\
1106 const int D=( x)*( y);\
1107 int i;\
1108 \
1109 assert(x<8 && y<8 && x>=0 && y>=0);\
1110\
1111 for(i=0; i<h; i++)\
1112 {\
1113 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1114 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1115 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1116 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1117 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
1118 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
1119 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
1120 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
1121 dst+= stride;\
1122 src+= stride;\
1123 }\
1124}
1125
1126#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
1127#define op_put(a, b) a = (((b) + 32)>>6)
1128
1129H264_CHROMA_MC(put_ , op_put)
1130H264_CHROMA_MC(avg_ , op_avg)
1131#undef op_avg
1132#undef op_put
1133
1134static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
1135{
1136 int i;
1137 for(i=0; i<h; i++)
1138 {
1139 ST32(dst , LD32(src ));
1140 dst+=dstStride;
1141 src+=srcStride;
1142 }
1143}
1144
1145static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
1146{
1147 int i;
1148 for(i=0; i<h; i++)
1149 {
1150 ST32(dst , LD32(src ));
1151 ST32(dst+4 , LD32(src+4 ));
1152 dst+=dstStride;
1153 src+=srcStride;
1154 }
1155}
1156
1157static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
1158{
1159 int i;
1160 for(i=0; i<h; i++)
1161 {
1162 ST32(dst , LD32(src ));
1163 ST32(dst+4 , LD32(src+4 ));
1164 ST32(dst+8 , LD32(src+8 ));
1165 ST32(dst+12, LD32(src+12));
1166 dst+=dstStride;
1167 src+=srcStride;
1168 }
1169}
073b013d 1170
0c1a9eda 1171static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
44eb4951 1172{
44eb4951
MN
1173 int i;
1174 for(i=0; i<h; i++)
1175 {
b3184779
MN
1176 ST32(dst , LD32(src ));
1177 ST32(dst+4 , LD32(src+4 ));
1178 ST32(dst+8 , LD32(src+8 ));
1179 ST32(dst+12, LD32(src+12));
1180 dst[16]= src[16];
44eb4951
MN
1181 dst+=dstStride;
1182 src+=srcStride;
1183 }
1184}
1185
0c1a9eda 1186static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
44eb4951
MN
1187{
1188 int i;
b3184779 1189 for(i=0; i<h; i++)
44eb4951 1190 {
b3184779
MN
1191 ST32(dst , LD32(src ));
1192 ST32(dst+4 , LD32(src+4 ));
1193 dst[8]= src[8];
44eb4951
MN
1194 dst+=dstStride;
1195 src+=srcStride;
1196 }
1197}
1198
826f429a 1199
b3184779 1200#define QPEL_MC(r, OPNAME, RND, OP) \
0c1a9eda
ZK
1201static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1202 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
b3184779
MN
1203 int i;\
1204 for(i=0; i<h; i++)\
1205 {\
1206 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
1207 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
1208 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
1209 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
1210 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
1211 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
1212 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
1213 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
1214 dst+=dstStride;\
1215 src+=srcStride;\
1216 }\
44eb4951
MN
1217}\
1218\
0c1a9eda 1219static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
db794953 1220 const int w=8;\
0c1a9eda 1221 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
b3184779
MN
1222 int i;\
1223 for(i=0; i<w; i++)\
1224 {\
1225 const int src0= src[0*srcStride];\
1226 const int src1= src[1*srcStride];\
1227 const int src2= src[2*srcStride];\
1228 const int src3= src[3*srcStride];\
1229 const int src4= src[4*srcStride];\
1230 const int src5= src[5*srcStride];\
1231 const int src6= src[6*srcStride];\
1232 const int src7= src[7*srcStride];\
1233 const int src8= src[8*srcStride];\
1234 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
1235 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
1236 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
1237 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
1238 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
1239 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
1240 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
1241 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
1242 dst++;\
1243 src++;\
1244 }\
1245}\
1246\
0c1a9eda
ZK
1247static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1248 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
b3184779 1249 int i;\
826f429a 1250 \
b3184779
MN
1251 for(i=0; i<h; i++)\
1252 {\
1253 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
1254 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
1255 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
1256 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
1257 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
1258 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
1259 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
1260 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
1261 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
1262 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
1263 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
1264 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
1265 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
1266 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
1267 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
1268 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
1269 dst+=dstStride;\
1270 src+=srcStride;\
1271 }\
1272}\
1273\
0c1a9eda
ZK
1274static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1275 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
b3184779 1276 int i;\
826f429a 1277 const int w=16;\
b3184779
MN
1278 for(i=0; i<w; i++)\
1279 {\
1280 const int src0= src[0*srcStride];\
1281 const int src1= src[1*srcStride];\
1282 const int src2= src[2*srcStride];\
1283 const int src3= src[3*srcStride];\
1284 const int src4= src[4*srcStride];\
1285 const int src5= src[5*srcStride];\
1286 const int src6= src[6*srcStride];\
1287 const int src7= src[7*srcStride];\
1288 const int src8= src[8*srcStride];\
1289 const int src9= src[9*srcStride];\
1290 const int src10= src[10*srcStride];\
1291 const int src11= src[11*srcStride];\
1292 const int src12= src[12*srcStride];\
1293 const int src13= src[13*srcStride];\
1294 const int src14= src[14*srcStride];\
1295 const int src15= src[15*srcStride];\
1296 const int src16= src[16*srcStride];\
1297 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
1298 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
1299 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
1300 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
1301 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
1302 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
1303 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
1304 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
1305 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
1306 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
1307 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
1308 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
1309 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
1310 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
1311 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
1312 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
1313 dst++;\
1314 src++;\
1315 }\
1316}\
1317\
0c1a9eda 1318static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
45553457 1319 OPNAME ## pixels8_c(dst, src, stride, 8);\
b3184779
MN
1320}\
1321\
0c1a9eda
ZK
1322static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1323 uint8_t half[64];\
b3184779
MN
1324 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1325 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
44eb4951
MN
1326}\
1327\
0c1a9eda 1328static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
b3184779 1329 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
44eb4951
MN
1330}\
1331\
0c1a9eda
ZK
1332static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1333 uint8_t half[64];\
b3184779
MN
1334 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1335 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
44eb4951
MN
1336}\
1337\
0c1a9eda
ZK
1338static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1339 uint8_t full[16*9];\
1340 uint8_t half[64];\
b3184779 1341 copy_block9(full, src, 16, stride, 9);\
db794953 1342 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
b3184779 1343 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
44eb4951
MN
1344}\
1345\
0c1a9eda
ZK
1346static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1347 uint8_t full[16*9];\
b3184779 1348 copy_block9(full, src, 16, stride, 9);\
db794953 1349 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
44eb4951
MN
1350}\
1351\
0c1a9eda
ZK
1352static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1353 uint8_t full[16*9];\
1354 uint8_t half[64];\
b3184779 1355 copy_block9(full, src, 16, stride, 9);\
db794953 1356 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
b3184779 1357 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
44eb4951 1358}\
0c1a9eda
ZK
1359void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1360 uint8_t full[16*9];\
1361 uint8_t halfH[72];\
1362 uint8_t halfV[64];\
1363 uint8_t halfHV[64];\
b3184779
MN
1364 copy_block9(full, src, 16, stride, 9);\
1365 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1366 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1367 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1368 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1369}\
0c1a9eda
ZK
1370static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1371 uint8_t full[16*9];\
1372 uint8_t halfH[72];\
1373 uint8_t halfHV[64];\
db794953
MN
1374 copy_block9(full, src, 16, stride, 9);\
1375 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1376 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1377 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1378 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1379}\
0c1a9eda
ZK
1380void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1381 uint8_t full[16*9];\
1382 uint8_t halfH[72];\
1383 uint8_t halfV[64];\
1384 uint8_t halfHV[64];\
b3184779
MN
1385 copy_block9(full, src, 16, stride, 9);\
1386 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1387 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1388 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1389 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1390}\
0c1a9eda
ZK
1391static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1392 uint8_t full[16*9];\
1393 uint8_t halfH[72];\
1394 uint8_t halfHV[64];\
db794953
MN
1395 copy_block9(full, src, 16, stride, 9);\
1396 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1397 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1398 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1399 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1400}\
0c1a9eda
ZK
1401void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1402 uint8_t full[16*9];\
1403 uint8_t halfH[72];\
1404 uint8_t halfV[64];\
1405 uint8_t halfHV[64];\
b3184779
MN
1406 copy_block9(full, src, 16, stride, 9);\
1407 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1408 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1409 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1410 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1411}\
0c1a9eda
ZK
1412static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1413 uint8_t full[16*9];\
1414 uint8_t halfH[72];\
1415 uint8_t halfHV[64];\
db794953
MN
1416 copy_block9(full, src, 16, stride, 9);\
1417 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1418 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1419 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1420 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1421}\
0c1a9eda
ZK
1422void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1423 uint8_t full[16*9];\
1424 uint8_t halfH[72];\
1425 uint8_t halfV[64];\
1426 uint8_t halfHV[64];\
b3184779
MN
1427 copy_block9(full, src, 16, stride, 9);\
1428 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
db794953
MN
1429 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1430 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1431 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1432}\
0c1a9eda
ZK
1433static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1434 uint8_t full[16*9];\
1435 uint8_t halfH[72];\
1436 uint8_t halfHV[64];\
db794953
MN
1437 copy_block9(full, src, 16, stride, 9);\
1438 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1439 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1440 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1441 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1442}\
0c1a9eda
ZK
1443static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1444 uint8_t halfH[72];\
1445 uint8_t halfHV[64];\
b3184779 1446 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1447 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1448 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
44eb4951 1449}\
0c1a9eda
ZK
1450static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1451 uint8_t halfH[72];\
1452 uint8_t halfHV[64];\
b3184779 1453 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1454 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1455 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
44eb4951 1456}\
0c1a9eda
ZK
1457void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1458 uint8_t full[16*9];\
1459 uint8_t halfH[72];\
1460 uint8_t halfV[64];\
1461 uint8_t halfHV[64];\
b3184779
MN
1462 copy_block9(full, src, 16, stride, 9);\
1463 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1464 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1465 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1466 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 1467}\
0c1a9eda
ZK
1468static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1469 uint8_t full[16*9];\
1470 uint8_t halfH[72];\
db794953
MN
1471 copy_block9(full, src, 16, stride, 9);\
1472 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1473 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1474 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1475}\
0c1a9eda
ZK
1476void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1477 uint8_t full[16*9];\
1478 uint8_t halfH[72];\
1479 uint8_t halfV[64];\
1480 uint8_t halfHV[64];\
b3184779
MN
1481 copy_block9(full, src, 16, stride, 9);\
1482 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1483 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1484 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1485 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 1486}\
0c1a9eda
ZK
1487static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1488 uint8_t full[16*9];\
1489 uint8_t halfH[72];\
db794953
MN
1490 copy_block9(full, src, 16, stride, 9);\
1491 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1492 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1493 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1494}\
0c1a9eda
ZK
1495static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1496 uint8_t halfH[72];\
b3184779 1497 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1498 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
b3184779 1499}\
0c1a9eda 1500static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
45553457 1501 OPNAME ## pixels16_c(dst, src, stride, 16);\
b3184779
MN
1502}\
1503\
0c1a9eda
ZK
1504static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1505 uint8_t half[256];\
b3184779
MN
1506 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1507 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
1508}\
1509\
0c1a9eda 1510static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
b3184779 1511 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
44eb4951 1512}\
b3184779 1513\
0c1a9eda
ZK
1514static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1515 uint8_t half[256];\
b3184779
MN
1516 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1517 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
1518}\
1519\
0c1a9eda
ZK
1520static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1521 uint8_t full[24*17];\
1522 uint8_t half[256];\
b3184779 1523 copy_block17(full, src, 24, stride, 17);\
826f429a 1524 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
b3184779
MN
1525 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
1526}\
1527\
0c1a9eda
ZK
1528static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1529 uint8_t full[24*17];\
b3184779 1530 copy_block17(full, src, 24, stride, 17);\
826f429a 1531 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
b3184779
MN
1532}\
1533\
0c1a9eda
ZK
1534static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1535 uint8_t full[24*17];\
1536 uint8_t half[256];\
b3184779 1537 copy_block17(full, src, 24, stride, 17);\
826f429a 1538 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
b3184779
MN
1539 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
1540}\
0c1a9eda
ZK
1541void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1542 uint8_t full[24*17];\
1543 uint8_t halfH[272];\
1544 uint8_t halfV[256];\
1545 uint8_t halfHV[256];\
b3184779
MN
1546 copy_block17(full, src, 24, stride, 17);\
1547 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1548 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1549 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1550 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1551}\
0c1a9eda
ZK
1552static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1553 uint8_t full[24*17];\
1554 uint8_t halfH[272];\
1555 uint8_t halfHV[256];\
db794953
MN
1556 copy_block17(full, src, 24, stride, 17);\
1557 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1558 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1559 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1560 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1561}\
0c1a9eda
ZK
1562void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1563 uint8_t full[24*17];\
1564 uint8_t halfH[272];\
1565 uint8_t halfV[256];\
1566 uint8_t halfHV[256];\
b3184779
MN
1567 copy_block17(full, src, 24, stride, 17);\
1568 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1569 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1570 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1571 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1572}\
0c1a9eda
ZK
1573static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1574 uint8_t full[24*17];\
1575 uint8_t halfH[272];\
1576 uint8_t halfHV[256];\
db794953
MN
1577 copy_block17(full, src, 24, stride, 17);\
1578 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1579 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1580 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1581 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1582}\
0c1a9eda
ZK
1583void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1584 uint8_t full[24*17];\
1585 uint8_t halfH[272];\
1586 uint8_t halfV[256];\
1587 uint8_t halfHV[256];\
b3184779
MN
1588 copy_block17(full, src, 24, stride, 17);\
1589 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1590 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1591 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1592 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1593}\
0c1a9eda
ZK
1594static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1595 uint8_t full[24*17];\
1596 uint8_t halfH[272];\
1597 uint8_t halfHV[256];\
db794953
MN
1598 copy_block17(full, src, 24, stride, 17);\
1599 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1600 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1601 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1602 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1603}\
0c1a9eda
ZK
1604void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1605 uint8_t full[24*17];\
1606 uint8_t halfH[272];\
1607 uint8_t halfV[256];\
1608 uint8_t halfHV[256];\
b3184779
MN
1609 copy_block17(full, src, 24, stride, 17);\
1610 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
826f429a
MN
1611 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1612 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1613 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1614}\
0c1a9eda
ZK
1615static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1616 uint8_t full[24*17];\
1617 uint8_t halfH[272];\
1618 uint8_t halfHV[256];\
db794953
MN
1619 copy_block17(full, src, 24, stride, 17);\
1620 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1621 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1622 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1623 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1624}\
0c1a9eda
ZK
1625static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1626 uint8_t halfH[272];\
1627 uint8_t halfHV[256];\
b3184779 1628 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1629 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1630 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1631}\
0c1a9eda
ZK
1632static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1633 uint8_t halfH[272];\
1634 uint8_t halfHV[256];\
b3184779 1635 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1636 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1637 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1638}\
0c1a9eda
ZK
1639void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1640 uint8_t full[24*17];\
1641 uint8_t halfH[272];\
1642 uint8_t halfV[256];\
1643 uint8_t halfHV[256];\
b3184779
MN
1644 copy_block17(full, src, 24, stride, 17);\
1645 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1646 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1647 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1648 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
1649}\
0c1a9eda
ZK
1650static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1651 uint8_t full[24*17];\
1652 uint8_t halfH[272];\
db794953
MN
1653 copy_block17(full, src, 24, stride, 17);\
1654 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1655 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1656 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1657}\
0c1a9eda
ZK
1658void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1659 uint8_t full[24*17];\
1660 uint8_t halfH[272];\
1661 uint8_t halfV[256];\
1662 uint8_t halfHV[256];\
b3184779
MN
1663 copy_block17(full, src, 24, stride, 17);\
1664 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1665 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1666 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1667 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
1668}\
0c1a9eda
ZK
1669static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1670 uint8_t full[24*17];\
1671 uint8_t halfH[272];\
db794953
MN
1672 copy_block17(full, src, 24, stride, 17);\
1673 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1674 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1675 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1676}\
0c1a9eda
ZK
1677static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1678 uint8_t halfH[272];\
b3184779 1679 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1680 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
45553457 1681}
44eb4951 1682
b3184779
MN
1683#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1684#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1685#define op_put(a, b) a = cm[((b) + 16)>>5]
1686#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1687
1688QPEL_MC(0, put_ , _ , op_put)
1689QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1690QPEL_MC(0, avg_ , _ , op_avg)
1691//QPEL_MC(1, avg_no_rnd , _ , op_avg)
1692#undef op_avg
1693#undef op_avg_no_rnd
1694#undef op_put
1695#undef op_put_no_rnd
44eb4951 1696
0da71265
MN
1697#if 1
1698#define H264_LOWPASS(OPNAME, OP, OP2) \
1699static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1700 const int h=4;\
1701 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1702 int i;\
1703 for(i=0; i<h; i++)\
1704 {\
1705 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
1706 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
1707 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
1708 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
1709 dst+=dstStride;\
1710 src+=srcStride;\
1711 }\
1712}\
1713\
1714static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1715 const int w=4;\
1716 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1717 int i;\
1718 for(i=0; i<w; i++)\
1719 {\
1720 const int srcB= src[-2*srcStride];\
1721 const int srcA= src[-1*srcStride];\
1722 const int src0= src[0 *srcStride];\
1723 const int src1= src[1 *srcStride];\
1724 const int src2= src[2 *srcStride];\
1725 const int src3= src[3 *srcStride];\
1726 const int src4= src[4 *srcStride];\
1727 const int src5= src[5 *srcStride];\
1728 const int src6= src[6 *srcStride];\
1729 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
1730 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
1731 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
1732 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
1733 dst++;\
1734 src++;\
1735 }\
1736}\
1737\
1738static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
1739 const int h=4;\
1740 const int w=4;\
1741 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1742 int i;\
1743 src -= 2*srcStride;\
1744 for(i=0; i<h+5; i++)\
1745 {\
1746 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
1747 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
1748 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
1749 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
1750 tmp+=tmpStride;\
1751 src+=srcStride;\
1752 }\
1753 tmp -= tmpStride*(h+5-2);\
1754 for(i=0; i<w; i++)\
1755 {\
1756 const int tmpB= tmp[-2*tmpStride];\
1757 const int tmpA= tmp[-1*tmpStride];\
1758 const int tmp0= tmp[0 *tmpStride];\
1759 const int tmp1= tmp[1 *tmpStride];\
1760 const int tmp2= tmp[2 *tmpStride];\
1761 const int tmp3= tmp[3 *tmpStride];\
1762 const int tmp4= tmp[4 *tmpStride];\
1763 const int tmp5= tmp[5 *tmpStride];\
1764 const int tmp6= tmp[6 *tmpStride];\
1765 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
1766 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
1767 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
1768 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
1769 dst++;\
1770 tmp++;\
1771 }\
1772}\
1773\
1774static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1775 const int h=8;\
1776 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1777 int i;\
1778 for(i=0; i<h; i++)\
1779 {\
1780 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
1781 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
1782 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
1783 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
1784 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
1785 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
1786 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
1787 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
1788 dst+=dstStride;\
1789 src+=srcStride;\
1790 }\
1791}\
1792\
1793static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1794 const int w=8;\
1795 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1796 int i;\
1797 for(i=0; i<w; i++)\
1798 {\
1799 const int srcB= src[-2*srcStride];\
1800 const int srcA= src[-1*srcStride];\
1801 const int src0= src[0 *srcStride];\
1802 const int src1= src[1 *srcStride];\
1803 const int src2= src[2 *srcStride];\
1804 const int src3= src[3 *srcStride];\
1805 const int src4= src[4 *srcStride];\
1806 const int src5= src[5 *srcStride];\
1807 const int src6= src[6 *srcStride];\
1808 const int src7= src[7 *srcStride];\
1809 const int src8= src[8 *srcStride];\
1810 const int src9= src[9 *srcStride];\
1811 const int src10=src[10*srcStride];\
1812 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
1813 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
1814 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
1815 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
1816 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
1817 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
1818 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
1819 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
1820 dst++;\
1821 src++;\
1822 }\
1823}\
1824\
1825static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
1826 const int h=8;\
1827 const int w=8;\
1828 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1829 int i;\
1830 src -= 2*srcStride;\
1831 for(i=0; i<h+5; i++)\
1832 {\
1833 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
1834 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
1835 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
1836 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
1837 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
1838 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
1839 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
1840 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
1841 tmp+=tmpStride;\
1842 src+=srcStride;\
1843 }\
1844 tmp -= tmpStride*(h+5-2);\
1845 for(i=0; i<w; i++)\
1846 {\
1847 const int tmpB= tmp[-2*tmpStride];\
1848 const int tmpA= tmp[-1*tmpStride];\
1849 const int tmp0= tmp[0 *tmpStride];\
1850 const int tmp1= tmp[1 *tmpStride];\
1851 const int tmp2= tmp[2 *tmpStride];\
1852 const int tmp3= tmp[3 *tmpStride];\
1853 const int tmp4= tmp[4 *tmpStride];\
1854 const int tmp5= tmp[5 *tmpStride];\
1855 const int tmp6= tmp[6 *tmpStride];\
1856 const int tmp7= tmp[7 *tmpStride];\
1857 const int tmp8= tmp[8 *tmpStride];\
1858 const int tmp9= tmp[9 *tmpStride];\
1859 const int tmp10=tmp[10*tmpStride];\
1860 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
1861 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
1862 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
1863 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
1864 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
1865 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
1866 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
1867 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
1868 dst++;\
1869 tmp++;\
1870 }\
1871}\
1872\
1873static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1874 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
1875 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
1876 src += 8*srcStride;\
1877 dst += 8*dstStride;\
1878 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
1879 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
1880}\
1881\
1882static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1883 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
1884 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
1885 src += 8*srcStride;\
1886 dst += 8*dstStride;\
1887 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
1888 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
1889}\
1890\
1891static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
1892 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
1893 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
1894 src += 8*srcStride;\
1895 tmp += 8*tmpStride;\
1896 dst += 8*dstStride;\
1897 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
1898 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
1899}\
1900
1901#define H264_MC(OPNAME, SIZE) \
1902static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
1903 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
1904}\
1905\
1906static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1907 uint8_t half[SIZE*SIZE];\
1908 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
1909 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
1910}\
1911\
1912static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1913 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
1914}\
1915\
1916static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1917 uint8_t half[SIZE*SIZE];\
1918 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
1919 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
1920}\
1921\
1922static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1923 uint8_t full[SIZE*(SIZE+5)];\
1924 uint8_t * const full_mid= full + SIZE*2;\
1925 uint8_t half[SIZE*SIZE];\
1926 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1927 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
1928 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
1929}\
1930\
1931static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1932 uint8_t full[SIZE*(SIZE+5)];\
1933 uint8_t * const full_mid= full + SIZE*2;\
1934 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1935 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
1936}\
1937\
1938static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1939 uint8_t full[SIZE*(SIZE+5)];\
1940 uint8_t * const full_mid= full + SIZE*2;\
1941 uint8_t half[SIZE*SIZE];\
1942 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1943 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
1944 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
1945}\
1946\
1947static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1948 uint8_t full[SIZE*(SIZE+5)];\
1949 uint8_t * const full_mid= full + SIZE*2;\
1950 uint8_t halfH[SIZE*SIZE];\
1951 uint8_t halfV[SIZE*SIZE];\
1952 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
1953 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1954 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1955 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
1956}\
1957\
1958static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1959 uint8_t full[SIZE*(SIZE+5)];\
1960 uint8_t * const full_mid= full + SIZE*2;\
1961 uint8_t halfH[SIZE*SIZE];\
1962 uint8_t halfV[SIZE*SIZE];\
1963 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
1964 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
1965 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1966 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
1967}\
1968\
1969static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1970 uint8_t full[SIZE*(SIZE+5)];\
1971 uint8_t * const full_mid= full + SIZE*2;\
1972 uint8_t halfH[SIZE*SIZE];\
1973 uint8_t halfV[SIZE*SIZE];\
1974 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
1975 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1976 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1977 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
1978}\
1979\
1980static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1981 uint8_t full[SIZE*(SIZE+5)];\
1982 uint8_t * const full_mid= full + SIZE*2;\
1983 uint8_t halfH[SIZE*SIZE];\
1984 uint8_t halfV[SIZE*SIZE];\
1985 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
1986 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
1987 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1988 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
1989}\
1990\
1991static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1992 int16_t tmp[SIZE*(SIZE+5)];\
1993 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
1994}\
1995\
1996static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1997 int16_t tmp[SIZE*(SIZE+5)];\
1998 uint8_t halfH[SIZE*SIZE];\
1999 uint8_t halfHV[SIZE*SIZE];\
2000 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2001 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2002 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2003}\
2004\
2005static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2006 int16_t tmp[SIZE*(SIZE+5)];\
2007 uint8_t halfH[SIZE*SIZE];\
2008 uint8_t halfHV[SIZE*SIZE];\
2009 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2010 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2011 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2012}\
2013\
2014static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2015 uint8_t full[SIZE*(SIZE+5)];\
2016 uint8_t * const full_mid= full + SIZE*2;\
2017 int16_t tmp[SIZE*(SIZE+5)];\
2018 uint8_t halfV[SIZE*SIZE];\
2019 uint8_t halfHV[SIZE*SIZE];\
2020 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2021 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2022 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2023 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2024}\
2025\
2026static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2027 uint8_t full[SIZE*(SIZE+5)];\
2028 uint8_t * const full_mid= full + SIZE*2;\
2029 int16_t tmp[SIZE*(SIZE+5)];\
2030 uint8_t halfV[SIZE*SIZE];\
2031 uint8_t halfHV[SIZE*SIZE];\
2032 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2033 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2034 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2035 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2036}\
2037
2038#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2039//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
2040#define op_put(a, b) a = cm[((b) + 16)>>5]
2041#define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
2042#define op2_put(a, b) a = cm[((b) + 512)>>10]
2043
2044H264_LOWPASS(put_ , op_put, op2_put)
2045H264_LOWPASS(avg_ , op_avg, op2_avg)
2046H264_MC(put_, 4)
2047H264_MC(put_, 8)
2048H264_MC(put_, 16)
2049H264_MC(avg_, 4)
2050H264_MC(avg_, 8)
2051H264_MC(avg_, 16)
2052
2053#undef op_avg
2054#undef op_put
2055#undef op2_avg
2056#undef op2_put
2057#endif
2058
1457ab52
MN
2059static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
2060 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2061 int i;
2062
2063 for(i=0; i<h; i++){
2064 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
2065 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
2066 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
2067 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
2068 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
2069 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
2070 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
2071 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
2072 dst+=dstStride;
2073 src+=srcStride;
2074 }
2075}
2076
2077static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
2078 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2079 int i;
2080
2081 for(i=0; i<w; i++){
2082 const int src_1= src[ -srcStride];
2083 const int src0 = src[0 ];
2084 const int src1 = src[ srcStride];
2085 const int src2 = src[2*srcStride];
2086 const int src3 = src[3*srcStride];
2087 const int src4 = src[4*srcStride];
2088 const int src5 = src[5*srcStride];
2089 const int src6 = src[6*srcStride];
2090 const int src7 = src[7*srcStride];
2091 const int src8 = src[8*srcStride];
2092 const int src9 = src[9*srcStride];
2093 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
2094 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
2095 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
2096 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
2097 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
2098 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
2099 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
2100 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
2101 src++;
2102 dst++;
2103 }
2104}
2105
2106static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
2107 put_pixels8_c(dst, src, stride, 8);
2108}
2109
2110static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
2111 uint8_t half[64];
2112 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
2113 put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
2114}
2115
2116static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
2117 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
2118}
2119
2120static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
2121 uint8_t half[64];
2122 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
2123 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
2124}
2125
2126static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
2127 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
2128}
2129
2130static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
2131 uint8_t halfH[88];
2132 uint8_t halfV[64];
2133 uint8_t halfHV[64];
2134 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2135 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
2136 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
2137 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
2138}
2139static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
2140 uint8_t halfH[88];
2141 uint8_t halfV[64];
2142 uint8_t halfHV[64];
2143 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2144 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
2145 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
2146 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
2147}
2148static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
2149 uint8_t halfH[88];
2150 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2151 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
2152}
2153
2154
0c1a9eda 2155static inline int pix_abs16x16_c(uint8_t *pix1, uint8_t *pix2, int line_size)
de6d9b64
FB
2156{
2157 int s, i;
2158
2159 s = 0;
ba6802de 2160 for(i=0;i<16;i++) {
de6d9b64
FB
2161 s += abs(pix1[0] - pix2[0]);
2162 s += abs(pix1[1] - pix2[1]);
2163 s += abs(pix1[2] - pix2[2]);
2164 s += abs(pix1[3] - pix2[3]);
2165 s += abs(pix1[4] - pix2[4]);
2166 s += abs(pix1[5] - pix2[5]);
2167 s += abs(pix1[6] - pix2[6]);
2168 s += abs(pix1[7] - pix2[7]);
2169 s += abs(pix1[8] - pix2[8]);
2170 s += abs(pix1[9] - pix2[9]);
2171 s += abs(pix1[10] - pix2[10]);
2172 s += abs(pix1[11] - pix2[11]);
2173 s += abs(pix1[12] - pix2[12]);
2174 s += abs(pix1[13] - pix2[13]);
2175 s += abs(pix1[14] - pix2[14]);
2176 s += abs(pix1[15] - pix2[15]);
2177 pix1 += line_size;
2178 pix2 += line_size;
2179 }
2180 return s;
2181}
2182
0c1a9eda 2183static int pix_abs16x16_x2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
de6d9b64
FB
2184{
2185 int s, i;
2186
2187 s = 0;
ba6802de 2188 for(i=0;i<16;i++) {
de6d9b64
FB
2189 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
2190 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
2191 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
2192 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
2193 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
2194 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
2195 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
2196 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
2197 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
2198 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
2199 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
2200 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
2201 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
2202 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
2203 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
2204 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
2205 pix1 += line_size;
2206 pix2 += line_size;
2207 }
2208 return s;
2209}
2210
0c1a9eda 2211static int pix_abs16x16_y2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
de6d9b64
FB
2212{
2213 int s, i;
0c1a9eda 2214 uint8_t *pix3 = pix2 + line_size;
de6d9b64
FB
2215
2216 s = 0;
ba6802de 2217 for(i=0;i<16;i++) {
de6d9b64
FB
2218 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
2219 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
2220 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
2221 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
2222 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
2223 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
2224 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
2225 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
2226 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
2227 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
2228 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
2229 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
2230 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
2231 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
2232 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
2233 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
2234 pix1 += line_size;
2235 pix2 += line_size;
2236 pix3 += line_size;
2237 }
2238 return s;
2239}
2240
0c1a9eda 2241static int pix_abs16x16_xy2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
de6d9b64
FB
2242{
2243 int s, i;
0c1a9eda 2244 uint8_t *pix3 = pix2 + line_size;
de6d9b64
FB
2245
2246 s = 0;
ba6802de 2247 for(i=0;i<16;i++) {
de6d9b64
FB
2248 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
2249 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
2250 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
2251 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
2252 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
2253 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
2254 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
2255 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
2256 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
2257 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
2258 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
2259 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
2260 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
2261 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
2262 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
2263 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
2264 pix1 += line_size;
2265 pix2 += line_size;
2266 pix3 += line_size;
2267 }
2268 return s;
2269}
2270
0c1a9eda 2271static inline int pix_abs8x8_c(uint8_t *pix1, uint8_t *pix2, int line_size)
ba6802de
MN
2272{
2273 int s, i;
2274
2275 s = 0;
2276 for(i=0;i<8;i++) {
2277 s += abs(pix1[0] - pix2[0]);
2278 s += abs(pix1[1] - pix2[1]);
2279 s += abs(pix1[2] - pix2[2]);
2280 s += abs(pix1[3] - pix2[3]);
2281 s += abs(pix1[4] - pix2[4]);
2282 s += abs(pix1[5] - pix2[5]);
2283 s += abs(pix1[6] - pix2[6]);
2284 s += abs(pix1[7] - pix2[7]);
2285 pix1 += line_size;
2286 pix2 += line_size;
2287 }
2288 return s;
2289}
2290
0c1a9eda 2291static int pix_abs8x8_x2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
ba6802de
MN
2292{
2293 int s, i;
2294
2295 s = 0;
2296 for(i=0;i<8;i++) {
2297 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
2298 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
2299 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
2300 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
2301 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
2302 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
2303 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
2304 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
2305 pix1 += line_size;
2306 pix2 += line_size;
2307 }
2308 return s;
2309}
2310
0c1a9eda 2311static int pix_abs8x8_y2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
ba6802de
MN
2312{
2313 int s, i;
0c1a9eda 2314 uint8_t *pix3 = pix2 + line_size;
ba6802de
MN
2315
2316 s = 0;
2317 for(i=0;i<8;i++) {
2318 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
2319 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
2320 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
2321 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
2322 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
2323 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
2324 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
2325 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
2326 pix1 += line_size;
2327 pix2 += line_size;
2328 pix3 += line_size;
2329 }
2330 return s;
2331}
2332
0c1a9eda 2333static int pix_abs8x8_xy2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
ba6802de
MN
2334{
2335 int s, i;
0c1a9eda 2336 uint8_t *pix3 = pix2 + line_size;
ba6802de
MN
2337
2338 s = 0;
2339 for(i=0;i<8;i++) {
2340 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
2341 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
2342 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
2343 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
2344 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
2345 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
2346 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
2347 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
2348 pix1 += line_size;
2349 pix2 += line_size;
2350 pix3 += line_size;
2351 }
2352 return s;
2353}
2354
1457ab52
MN
2355static int sad16x16_c(void *s, uint8_t *a, uint8_t *b, int stride){
2356 return pix_abs16x16_c(a,b,stride);
2357}
2358
2359static int sad8x8_c(void *s, uint8_t *a, uint8_t *b, int stride){
2360 return pix_abs8x8_c(a,b,stride);
2361}
2362
a9badb51
MN
2363/**
2364 * permutes an 8x8 block.
2a5700de 2365 * @param block the block which will be permuted according to the given permutation vector
a9badb51
MN
2366 * @param permutation the permutation vector
2367 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
2a5700de
MN
2368 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
2369 * (inverse) permutated to scantable order!
a9badb51 2370 */
0c1a9eda 2371void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
d962f6fd 2372{
7801d21d 2373 int i;
477ab036 2374 DCTELEM temp[64];
7801d21d
MN
2375
2376 if(last<=0) return;
9a7b310d 2377 //if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms
d962f6fd 2378
7801d21d
MN
2379 for(i=0; i<=last; i++){
2380 const int j= scantable[i];
2381 temp[j]= block[j];
2382 block[j]=0;
2383 }
2384
2385 for(i=0; i<=last; i++){
2386 const int j= scantable[i];
2387 const int perm_j= permutation[j];
2388 block[perm_j]= temp[j];
2389 }
d962f6fd 2390}
e0eac44e 2391
2a5700de
MN
2392/**
2393 * memset(blocks, 0, sizeof(DCTELEM)*6*64)
2394 */
eb4b3dd3 2395static void clear_blocks_c(DCTELEM *blocks)
649c00c9
MN
2396{
2397 memset(blocks, 0, sizeof(DCTELEM)*6*64);
2398}
2399
11f18faf
MN
2400static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
2401 int i;
d32ac509 2402 for(i=0; i+7<w; i+=8){
11f18faf
MN
2403 dst[i+0] += src[i+0];
2404 dst[i+1] += src[i+1];
2405 dst[i+2] += src[i+2];
2406 dst[i+3] += src[i+3];
2407 dst[i+4] += src[i+4];
2408 dst[i+5] += src[i+5];
2409 dst[i+6] += src[i+6];
2410 dst[i+7] += src[i+7];
2411 }
2412 for(; i<w; i++)
2413 dst[i+0] += src[i+0];
2414}
2415
2416static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
2417 int i;
d32ac509 2418 for(i=0; i+7<w; i+=8){
11f18faf
MN
2419 dst[i+0] = src1[i+0]-src2[i+0];
2420 dst[i+1] = src1[i+1]-src2[i+1];
2421 dst[i+2] = src1[i+2]-src2[i+2];
2422 dst[i+3] = src1[i+3]-src2[i+3];
2423 dst[i+4] = src1[i+4]-src2[i+4];
2424 dst[i+5] = src1[i+5]-src2[i+5];
2425 dst[i+6] = src1[i+6]-src2[i+6];
2426 dst[i+7] = src1[i+7]-src2[i+7];
2427 }
2428 for(; i<w; i++)
2429 dst[i+0] = src1[i+0]-src2[i+0];
2430}
2431
1457ab52
MN
2432#define BUTTERFLY2(o1,o2,i1,i2) \
2433o1= (i1)+(i2);\
2434o2= (i1)-(i2);
2435
2436#define BUTTERFLY1(x,y) \
2437{\
2438 int a,b;\
2439 a= x;\
2440 b= y;\
2441 x= a+b;\
2442 y= a-b;\
2443}
2444
2445#define BUTTERFLYA(x,y) (ABS((x)+(y)) + ABS((x)-(y)))
2446
2447static int hadamard8_diff_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride){
2448 int i;
2449 int temp[64];
2450 int sum=0;
2451
2452 for(i=0; i<8; i++){
2453 //FIXME try pointer walks
2454 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
2455 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
2456 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
2457 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
2458
2459 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2460 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2461 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2462 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2463
2464 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2465 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2466 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2467 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2468 }
2469
2470 for(i=0; i<8; i++){
2471 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2472 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2473 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2474 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2475
2476 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2477 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2478 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2479 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2480
2481 sum +=
2482 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2483 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2484 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2485 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2486 }
2487#if 0
2488static int maxi=0;
2489if(sum>maxi){
2490 maxi=sum;
2491 printf("MAX:%d\n", maxi);
2492}
2493#endif
2494 return sum;
2495}
2496
2497static int hadamard8_abs_c(uint8_t *src, int stride, int mean){
2498 int i;
2499 int temp[64];
2500 int sum=0;
2501//FIXME OOOPS ignore 0 term instead of mean mess
2502 for(i=0; i<8; i++){
2503 //FIXME try pointer walks
2504 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-mean,src[stride*i+1]-mean);
2505 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-mean,src[stride*i+3]-mean);
2506 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-mean,src[stride*i+5]-mean);
2507 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-mean,src[stride*i+7]-mean);
2508
2509 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2510 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2511 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2512 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2513
2514 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2515 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2516 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2517 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2518 }
2519
2520 for(i=0; i<8; i++){
2521 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2522 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2523 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2524 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2525
2526 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2527 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2528 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2529 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2530
2531 sum +=
2532 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2533 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2534 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2535 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2536 }
2537
2538 return sum;
2539}
2540
2541static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
2542 MpegEncContext * const s= (MpegEncContext *)c;
76fbb024
MN
2543 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
2544 DCTELEM * const temp= (DCTELEM*)aligned_temp;
1457ab52
MN
2545 int sum=0, i;
2546
2547 s->dsp.diff_pixels(temp, src1, src2, stride);
b0368839 2548 s->dsp.fdct(temp);
1457ab52
MN
2549
2550 for(i=0; i<64; i++)
2551 sum+= ABS(temp[i]);
2552
2553 return sum;
2554}
2555
0e15384d 2556void simple_idct(DCTELEM *block); //FIXME
1457ab52
MN
2557
2558static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
2559 MpegEncContext * const s= (MpegEncContext *)c;
76fbb024
MN
2560 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64*2/8];
2561 DCTELEM * const temp= (DCTELEM*)aligned_temp;
2562 DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64;
1457ab52
MN
2563 int sum=0, i;
2564
2565 s->mb_intra=0;
2566
2567 s->dsp.diff_pixels(temp, src1, src2, stride);
2568
2569 memcpy(bak, temp, 64*sizeof(DCTELEM));
2570
67725183 2571 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
1457ab52
MN
2572 s->dct_unquantize(s, temp, 0, s->qscale);
2573 simple_idct(temp); //FIXME
2574
2575 for(i=0; i<64; i++)
2576 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
2577
2578 return sum;
2579}
2580
3a87ac94
MN
2581static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
2582 MpegEncContext * const s= (MpegEncContext *)c;
0c1a9eda 2583 const uint8_t *scantable= s->intra_scantable.permutated;
76fbb024
MN
2584 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
2585 uint64_t __align8 aligned_bak[stride];
2586 DCTELEM * const temp= (DCTELEM*)aligned_temp;
2587 uint8_t * const bak= (uint8_t*)aligned_bak;
3a87ac94
MN
2588 int i, last, run, bits, level, distoration, start_i;
2589 const int esc_length= s->ac_esc_length;
2590 uint8_t * length;
2591 uint8_t * last_length;
67725183
MN
2592
2593 for(i=0; i<8; i++){
2594 ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0];
2595 ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1];
2596 }
3a87ac94 2597
67725183
MN
2598 s->dsp.diff_pixels(temp, src1, src2, stride);
2599
2600 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2601
2602 bits=0;
3a87ac94
MN
2603
2604 if (s->mb_intra) {
67725183 2605 start_i = 1;
3a87ac94
MN
2606 length = s->intra_ac_vlc_length;
2607 last_length= s->intra_ac_vlc_last_length;
67725183 2608 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3a87ac94
MN
2609 } else {
2610 start_i = 0;
2611 length = s->inter_ac_vlc_length;
2612 last_length= s->inter_ac_vlc_last_length;
2613 }
3a87ac94 2614
67725183 2615 if(last>=start_i){
3a87ac94
MN
2616 run=0;
2617 for(i=start_i; i<last; i++){
2618 int j= scantable[i];
2619 level= temp[j];
2620
2621 if(level){
2622 level+=64;
2623 if((level&(~127)) == 0){
2624 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2625 }else
2626 bits+= esc_length;
2627 run=0;
2628 }else
2629 run++;
2630 }
2631 i= scantable[last];
1d0eab1d 2632
3a87ac94 2633 level= temp[i] + 64;
1d0eab1d
MN
2634
2635 assert(level - 64);
2636
3a87ac94
MN
2637 if((level&(~127)) == 0){
2638 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2639 }else
2640 bits+= esc_length;
2641
67725183
MN
2642 }
2643
2644 if(last>=0){
3a87ac94
MN
2645 s->dct_unquantize(s, temp, 0, s->qscale);
2646 }
2647
b0368839 2648 s->dsp.idct_add(bak, stride, temp);
3a87ac94
MN
2649
2650 distoration= s->dsp.sse[1](NULL, bak, src1, stride);
2651
67725183 2652 return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7);
3a87ac94
MN
2653}
2654
2655static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
2656 MpegEncContext * const s= (MpegEncContext *)c;
0c1a9eda 2657 const uint8_t *scantable= s->intra_scantable.permutated;
76fbb024
MN
2658 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
2659 DCTELEM * const temp= (DCTELEM*)aligned_temp;
3a87ac94
MN
2660 int i, last, run, bits, level, start_i;
2661 const int esc_length= s->ac_esc_length;
2662 uint8_t * length;
2663 uint8_t * last_length;
67725183
MN
2664
2665 s->dsp.diff_pixels(temp, src1, src2, stride);
3a87ac94 2666
67725183
MN
2667 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2668
2669 bits=0;
3a87ac94
MN
2670
2671 if (s->mb_intra) {
67725183 2672 start_i = 1;
3a87ac94
MN
2673 length = s->intra_ac_vlc_length;
2674 last_length= s->intra_ac_vlc_last_length;
67725183 2675 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3a87ac94
MN
2676 } else {
2677 start_i = 0;
2678 length = s->inter_ac_vlc_length;
2679 last_length= s->inter_ac_vlc_last_length;
2680 }
3a87ac94 2681
67725183 2682 if(last>=start_i){
3a87ac94
MN
2683 run=0;
2684 for(i=start_i; i<last; i++){
2685 int j= scantable[i];
2686 level= temp[j];
2687
2688 if(level){
2689 level+=64;
2690 if((level&(~127)) == 0){
2691 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2692 }else
2693 bits+= esc_length;
2694 run=0;
2695 }else
2696 run++;
2697 }
2698 i= scantable[last];
67725183
MN
2699
2700 level= temp[i] + 64;
3a87ac94 2701
67725183 2702 assert(level - 64);
3a87ac94 2703
3a87ac94
MN
2704 if((level&(~127)) == 0){
2705 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2706 }else
2707 bits+= esc_length;
2708 }
2709
2710 return bits;
2711}
2712
2713
1457ab52
MN
2714WARPER88_1616(hadamard8_diff_c, hadamard8_diff16_c)
2715WARPER88_1616(dct_sad8x8_c, dct_sad16x16_c)
2716WARPER88_1616(quant_psnr8x8_c, quant_psnr16x16_c)
3a87ac94
MN
2717WARPER88_1616(rd8x8_c, rd16x16_c)
2718WARPER88_1616(bit8x8_c, bit16x16_c)
1457ab52 2719
b0368839
MN
2720/* XXX: those functions should be suppressed ASAP when all IDCTs are
2721 converted */
2722static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
2723{
2724 j_rev_dct (block);
2725 put_pixels_clamped_c(block, dest, line_size);
2726}
2727static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
2728{
2729 j_rev_dct (block);
2730 add_pixels_clamped_c(block, dest, line_size);
2731}
2732
59cf08ce
FB
2733/* init static data */
2734void dsputil_static_init(void)
e0eac44e 2735{
d2975f8d 2736 int i;
e0eac44e 2737
59cf08ce
FB
2738 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
2739 for(i=0;i<MAX_NEG_CROP;i++) {
2740 cropTbl[i] = 0;
2741 cropTbl[i + MAX_NEG_CROP + 256] = 255;
2742 }
2743
2744 for(i=0;i<512;i++) {
2745 squareTbl[i] = (i - 256) * (i - 256);
2746 }
2747
2748 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
2749}
92ddb692 2750
92ddb692 2751
59cf08ce
FB
2752void dsputil_init(DSPContext* c, AVCodecContext *avctx)
2753{
2754 int i;
de6d9b64 2755
b0368839
MN
2756#ifdef CONFIG_ENCODERS
2757 if(avctx->dct_algo==FF_DCT_FASTINT)
2758 c->fdct = fdct_ifast;
2759 else
2760 c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
2761#endif //CONFIG_ENCODERS
2762
2763 if(avctx->idct_algo==FF_IDCT_INT){
2764 c->idct_put= ff_jref_idct_put;
2765 c->idct_add= ff_jref_idct_add;
2766 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
2767 }else{ //accurate/default
2768 c->idct_put= simple_idct_put;
2769 c->idct_add= simple_idct_add;
2770 c->idct_permutation_type= FF_NO_IDCT_PERM;
2771 }
2772
eb4b3dd3
ZK
2773 c->get_pixels = get_pixels_c;
2774 c->diff_pixels = diff_pixels_c;
2775 c->put_pixels_clamped = put_pixels_clamped_c;
2776 c->add_pixels_clamped = add_pixels_clamped_c;
2777 c->gmc1 = gmc1_c;
2778 c->gmc = gmc_c;
2779 c->clear_blocks = clear_blocks_c;
2780 c->pix_sum = pix_sum_c;
2781 c->pix_norm1 = pix_norm1_c;
1457ab52
MN
2782 c->sse[0]= sse16_c;
2783 c->sse[1]= sse8_c;
eb4b3dd3 2784
45553457 2785 /* TODO [0] 16 [1] 8 */
eb4b3dd3
ZK
2786 c->pix_abs16x16 = pix_abs16x16_c;
2787 c->pix_abs16x16_x2 = pix_abs16x16_x2_c;
2788 c->pix_abs16x16_y2 = pix_abs16x16_y2_c;
2789 c->pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
2790 c->pix_abs8x8 = pix_abs8x8_c;
2791 c->pix_abs8x8_x2 = pix_abs8x8_x2_c;
2792 c->pix_abs8x8_y2 = pix_abs8x8_y2_c;
2793 c->pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
2794
45553457
ZK
2795#define dspfunc(PFX, IDX, NUM) \
2796 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
2797 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \
2798 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \
2799 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
2800
2801 dspfunc(put, 0, 16);
2802 dspfunc(put_no_rnd, 0, 16);
2803 dspfunc(put, 1, 8);
2804 dspfunc(put_no_rnd, 1, 8);
669ac79c
MN
2805 dspfunc(put, 2, 4);
2806 dspfunc(put, 3, 2);
45553457
ZK
2807
2808 dspfunc(avg, 0, 16);
2809 dspfunc(avg_no_rnd, 0, 16);
2810 dspfunc(avg, 1, 8);
2811 dspfunc(avg_no_rnd, 1, 8);
2812#undef dspfunc
2813
669ac79c
MN
2814 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
2815 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
2816 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
2817 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
2818 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
2819 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
2820 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
2821 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
2822 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
2823
45553457
ZK
2824#define dspfunc(PFX, IDX, NUM) \
2825 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2826 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2827 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2828 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2829 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2830 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2831 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2832 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2833 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2834 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2835 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2836 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2837 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2838 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2839 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2840 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2841
2842 dspfunc(put_qpel, 0, 16);
2843 dspfunc(put_no_rnd_qpel, 0, 16);
2844
2845 dspfunc(avg_qpel, 0, 16);
2846 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
2847
2848 dspfunc(put_qpel, 1, 8);
2849 dspfunc(put_no_rnd_qpel, 1, 8);
2850
2851 dspfunc(avg_qpel, 1, 8);
2852 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
0da71265
MN
2853
2854 dspfunc(put_h264_qpel, 0, 16);
2855 dspfunc(put_h264_qpel, 1, 8);
2856 dspfunc(put_h264_qpel, 2, 4);
2857 dspfunc(avg_h264_qpel, 0, 16);
2858 dspfunc(avg_h264_qpel, 1, 8);
2859 dspfunc(avg_h264_qpel, 2, 4);
2860
45553457 2861#undef dspfunc
0da71265
MN
2862 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
2863 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
2864 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
2865 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
2866 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
2867 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
c9a2ebc4 2868
1457ab52
MN
2869 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
2870 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
2871 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
2872 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
2873 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
2874 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
2875 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
2876 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
669ac79c 2877
1457ab52
MN
2878 c->hadamard8_diff[0]= hadamard8_diff16_c;
2879 c->hadamard8_diff[1]= hadamard8_diff_c;
2880 c->hadamard8_abs = hadamard8_abs_c;
2881
2882 c->dct_sad[0]= dct_sad16x16_c;
2883 c->dct_sad[1]= dct_sad8x8_c;
2884
2885 c->sad[0]= sad16x16_c;
2886 c->sad[1]= sad8x8_c;
2887
2888 c->quant_psnr[0]= quant_psnr16x16_c;
2889 c->quant_psnr[1]= quant_psnr8x8_c;
3a87ac94
MN
2890
2891 c->rd[0]= rd16x16_c;
2892 c->rd[1]= rd8x8_c;
2893
2894 c->bit[0]= bit16x16_c;
2895 c->bit[1]= bit8x8_c;
2896
11f18faf
MN
2897 c->add_bytes= add_bytes_c;
2898 c->diff_bytes= diff_bytes_c;
3d2e8cce 2899 c->bswap_buf= bswap_buf;
11f18faf 2900
980fc7b8 2901#ifdef HAVE_MMX
b0368839 2902 dsputil_init_mmx(c, avctx);
de6d9b64 2903#endif
3d03c0a2 2904#ifdef ARCH_ARMV4L
b0368839 2905 dsputil_init_armv4l(c, avctx);
3d03c0a2 2906#endif
c34270f5 2907#ifdef HAVE_MLIB
b0368839 2908 dsputil_init_mlib(c, avctx);
c34270f5 2909#endif
1e98dffb 2910#ifdef ARCH_ALPHA
b0368839 2911 dsputil_init_alpha(c, avctx);
1e98dffb 2912#endif
59925ef2 2913#ifdef ARCH_POWERPC
b0368839 2914 dsputil_init_ppc(c, avctx);
a43bd1d7 2915#endif
d46aba26 2916#ifdef HAVE_MMI
b0368839 2917 dsputil_init_mmi(c, avctx);
d46aba26 2918#endif
0c6bd2ea
B
2919#ifdef ARCH_SH4
2920 dsputil_init_sh4(c,avctx);
2921#endif
43f1708f 2922
b0368839
MN
2923 switch(c->idct_permutation_type){
2924 case FF_NO_IDCT_PERM:
2925 for(i=0; i<64; i++)
2926 c->idct_permutation[i]= i;
2927 break;
2928 case FF_LIBMPEG2_IDCT_PERM:
2929 for(i=0; i<64; i++)
2930 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
2931 break;
2932 case FF_SIMPLE_IDCT_PERM:
2933 for(i=0; i<64; i++)
2934 c->idct_permutation[i]= simple_mmx_permutation[i];
2935 break;
2936 case FF_TRANSPOSE_IDCT_PERM:
2937 for(i=0; i<64; i++)
2938 c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
2939 break;
2940 default:
2941 fprintf(stderr, "Internal error, IDCT permutation not set\n");
2942 }
57060b1e 2943}
b0368839 2944