fix subtle logic problem in block unpacker that leads to incorrect token
[libav.git] / libavcodec / dsputil.c
CommitLineData
de6d9b64
FB
1/*
2 * DSP utils
ff4ec49e 3 * Copyright (c) 2000, 2001 Fabrice Bellard.
de6d9b64 4 *
ff4ec49e
FB
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
de6d9b64 9 *
ff4ec49e 10 * This library is distributed in the hope that it will be useful,
de6d9b64 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ff4ec49e
FB
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
de6d9b64 14 *
ff4ec49e
FB
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
7ff037e9 18 *
59fe111e 19 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
de6d9b64 20 */
983e3246
MN
21
22/**
23 * @file dsputil.c
24 * DSP utils
25 */
26
de6d9b64
FB
27#include "avcodec.h"
28#include "dsputil.h"
1457ab52 29#include "mpegvideo.h"
b0368839 30#include "simple_idct.h"
45553457 31
5596c60c 32
0c1a9eda
ZK
33uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
34uint32_t squareTbl[512];
de6d9b64 35
0c1a9eda 36const uint8_t ff_zigzag_direct[64] = {
2ad1516a
MN
37 0, 1, 8, 16, 9, 2, 3, 10,
38 17, 24, 32, 25, 18, 11, 4, 5,
e0eac44e 39 12, 19, 26, 33, 40, 48, 41, 34,
2ad1516a 40 27, 20, 13, 6, 7, 14, 21, 28,
e0eac44e
FB
41 35, 42, 49, 56, 57, 50, 43, 36,
42 29, 22, 15, 23, 30, 37, 44, 51,
43 58, 59, 52, 45, 38, 31, 39, 46,
44 53, 60, 61, 54, 47, 55, 62, 63
45};
46
2f349de2 47/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
0c1a9eda 48uint16_t __align8 inv_zigzag_direct16[64];
2f349de2 49
0c1a9eda 50const uint8_t ff_alternate_horizontal_scan[64] = {
2ad1516a 51 0, 1, 2, 3, 8, 9, 16, 17,
e0eac44e
FB
52 10, 11, 4, 5, 6, 7, 15, 14,
53 13, 12, 19, 18, 24, 25, 32, 33,
54 26, 27, 20, 21, 22, 23, 28, 29,
55 30, 31, 34, 35, 40, 41, 48, 49,
56 42, 43, 36, 37, 38, 39, 44, 45,
57 46, 47, 50, 51, 56, 57, 58, 59,
58 52, 53, 54, 55, 60, 61, 62, 63,
59};
60
0c1a9eda 61const uint8_t ff_alternate_vertical_scan[64] = {
2ad1516a 62 0, 8, 16, 24, 1, 9, 2, 10,
e0eac44e
FB
63 17, 25, 32, 40, 48, 56, 57, 49,
64 41, 33, 26, 18, 3, 11, 4, 12,
65 19, 27, 34, 42, 50, 58, 35, 43,
66 51, 59, 20, 28, 5, 13, 6, 14,
67 21, 29, 36, 44, 52, 60, 37, 45,
68 53, 61, 22, 30, 7, 15, 23, 31,
69 38, 46, 54, 62, 39, 47, 55, 63,
70};
71
2f349de2 72/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
0c1a9eda 73const uint32_t inverse[256]={
2f349de2
MN
74 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
75 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
76 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
77 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
78 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
79 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
80 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
81 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
82 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
83 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
84 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
85 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
86 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
87 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
88 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
89 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
90 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
91 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
92 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
93 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
94 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
95 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
96 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
97 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
98 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
99 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
100 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
101 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
102 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
103 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
104 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
105 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
106};
107
b0368839
MN
108/* Input permutation for the simple_idct_mmx */
109static const uint8_t simple_mmx_permutation[64]={
110 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
111 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
112 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
113 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
114 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
115 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
116 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
117 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
118};
119
0c1a9eda 120static int pix_sum_c(uint8_t * pix, int line_size)
3aa102be
MN
121{
122 int s, i, j;
123
124 s = 0;
125 for (i = 0; i < 16; i++) {
126 for (j = 0; j < 16; j += 8) {
127 s += pix[0];
128 s += pix[1];
129 s += pix[2];
130 s += pix[3];
131 s += pix[4];
132 s += pix[5];
133 s += pix[6];
134 s += pix[7];
135 pix += 8;
136 }
137 pix += line_size - 16;
138 }
139 return s;
140}
141
0c1a9eda 142static int pix_norm1_c(uint8_t * pix, int line_size)
3aa102be
MN
143{
144 int s, i, j;
0c1a9eda 145 uint32_t *sq = squareTbl + 256;
3aa102be
MN
146
147 s = 0;
148 for (i = 0; i < 16; i++) {
149 for (j = 0; j < 16; j += 8) {
2a006cd3 150#if 0
3aa102be
MN
151 s += sq[pix[0]];
152 s += sq[pix[1]];
153 s += sq[pix[2]];
154 s += sq[pix[3]];
155 s += sq[pix[4]];
156 s += sq[pix[5]];
157 s += sq[pix[6]];
158 s += sq[pix[7]];
2a006cd3
FL
159#else
160#if LONG_MAX > 2147483647
161 register uint64_t x=*(uint64_t*)pix;
162 s += sq[x&0xff];
163 s += sq[(x>>8)&0xff];
164 s += sq[(x>>16)&0xff];
165 s += sq[(x>>24)&0xff];
166 s += sq[(x>>32)&0xff];
167 s += sq[(x>>40)&0xff];
168 s += sq[(x>>48)&0xff];
169 s += sq[(x>>56)&0xff];
170#else
171 register uint32_t x=*(uint32_t*)pix;
172 s += sq[x&0xff];
173 s += sq[(x>>8)&0xff];
174 s += sq[(x>>16)&0xff];
175 s += sq[(x>>24)&0xff];
176 x=*(uint32_t*)(pix+4);
177 s += sq[x&0xff];
178 s += sq[(x>>8)&0xff];
179 s += sq[(x>>16)&0xff];
180 s += sq[(x>>24)&0xff];
181#endif
182#endif
3aa102be
MN
183 pix += 8;
184 }
185 pix += line_size - 16;
186 }
187 return s;
188}
189
190
0c1a9eda 191static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size)
1457ab52
MN
192{
193 int s, i;
0c1a9eda 194 uint32_t *sq = squareTbl + 256;
1457ab52
MN
195
196 s = 0;
197 for (i = 0; i < 8; i++) {
198 s += sq[pix1[0] - pix2[0]];
199 s += sq[pix1[1] - pix2[1]];
200 s += sq[pix1[2] - pix2[2]];
201 s += sq[pix1[3] - pix2[3]];
202 s += sq[pix1[4] - pix2[4]];
203 s += sq[pix1[5] - pix2[5]];
204 s += sq[pix1[6] - pix2[6]];
205 s += sq[pix1[7] - pix2[7]];
206 pix1 += line_size;
207 pix2 += line_size;
208 }
209 return s;
210}
211
6b026927 212static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size)
9c76bd48 213{
6b026927
FH
214 int s, i;
215 uint32_t *sq = squareTbl + 256;
9c76bd48
BF
216
217 s = 0;
218 for (i = 0; i < 16; i++) {
6b026927
FH
219 s += sq[pix1[ 0] - pix2[ 0]];
220 s += sq[pix1[ 1] - pix2[ 1]];
221 s += sq[pix1[ 2] - pix2[ 2]];
222 s += sq[pix1[ 3] - pix2[ 3]];
223 s += sq[pix1[ 4] - pix2[ 4]];
224 s += sq[pix1[ 5] - pix2[ 5]];
225 s += sq[pix1[ 6] - pix2[ 6]];
226 s += sq[pix1[ 7] - pix2[ 7]];
227 s += sq[pix1[ 8] - pix2[ 8]];
228 s += sq[pix1[ 9] - pix2[ 9]];
229 s += sq[pix1[10] - pix2[10]];
230 s += sq[pix1[11] - pix2[11]];
231 s += sq[pix1[12] - pix2[12]];
232 s += sq[pix1[13] - pix2[13]];
233 s += sq[pix1[14] - pix2[14]];
234 s += sq[pix1[15] - pix2[15]];
2a006cd3 235
6b026927
FH
236 pix1 += line_size;
237 pix2 += line_size;
9c76bd48
BF
238 }
239 return s;
240}
241
0c1a9eda 242static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
de6d9b64 243{
de6d9b64
FB
244 int i;
245
246 /* read the pixels */
de6d9b64 247 for(i=0;i<8;i++) {
c13e1abd
FH
248 block[0] = pixels[0];
249 block[1] = pixels[1];
250 block[2] = pixels[2];
251 block[3] = pixels[3];
252 block[4] = pixels[4];
253 block[5] = pixels[5];
254 block[6] = pixels[6];
255 block[7] = pixels[7];
256 pixels += line_size;
257 block += 8;
de6d9b64
FB
258 }
259}
260
0c1a9eda
ZK
261static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
262 const uint8_t *s2, int stride){
9dbcbd92
MN
263 int i;
264
265 /* read the pixels */
9dbcbd92 266 for(i=0;i<8;i++) {
c13e1abd
FH
267 block[0] = s1[0] - s2[0];
268 block[1] = s1[1] - s2[1];
269 block[2] = s1[2] - s2[2];
270 block[3] = s1[3] - s2[3];
271 block[4] = s1[4] - s2[4];
272 block[5] = s1[5] - s2[5];
273 block[6] = s1[6] - s2[6];
274 block[7] = s1[7] - s2[7];
9dbcbd92
MN
275 s1 += stride;
276 s2 += stride;
c13e1abd 277 block += 8;
9dbcbd92
MN
278 }
279}
280
281
0c1a9eda 282static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
eb4b3dd3 283 int line_size)
de6d9b64 284{
de6d9b64 285 int i;
0c1a9eda 286 uint8_t *cm = cropTbl + MAX_NEG_CROP;
de6d9b64
FB
287
288 /* read the pixels */
de6d9b64 289 for(i=0;i<8;i++) {
c13e1abd
FH
290 pixels[0] = cm[block[0]];
291 pixels[1] = cm[block[1]];
292 pixels[2] = cm[block[2]];
293 pixels[3] = cm[block[3]];
294 pixels[4] = cm[block[4]];
295 pixels[5] = cm[block[5]];
296 pixels[6] = cm[block[6]];
297 pixels[7] = cm[block[7]];
298
299 pixels += line_size;
300 block += 8;
de6d9b64
FB
301 }
302}
303
0c1a9eda 304static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
c13e1abd 305 int line_size)
de6d9b64 306{
de6d9b64 307 int i;
0c1a9eda 308 uint8_t *cm = cropTbl + MAX_NEG_CROP;
de6d9b64
FB
309
310 /* read the pixels */
de6d9b64 311 for(i=0;i<8;i++) {
c13e1abd
FH
312 pixels[0] = cm[pixels[0] + block[0]];
313 pixels[1] = cm[pixels[1] + block[1]];
314 pixels[2] = cm[pixels[2] + block[2]];
315 pixels[3] = cm[pixels[3] + block[3]];
316 pixels[4] = cm[pixels[4] + block[4]];
317 pixels[5] = cm[pixels[5] + block[5]];
318 pixels[6] = cm[pixels[6] + block[6]];
319 pixels[7] = cm[pixels[7] + block[7]];
320 pixels += line_size;
321 block += 8;
de6d9b64
FB
322 }
323}
59fe111e
MN
324#if 0
325
326#define PIXOP2(OPNAME, OP) \
b3184779 327static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
328{\
329 int i;\
330 for(i=0; i<h; i++){\
331 OP(*((uint64_t*)block), LD64(pixels));\
332 pixels+=line_size;\
333 block +=line_size;\
334 }\
335}\
336\
45553457 337static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
338{\
339 int i;\
340 for(i=0; i<h; i++){\
341 const uint64_t a= LD64(pixels );\
342 const uint64_t b= LD64(pixels+1);\
343 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
344 pixels+=line_size;\
345 block +=line_size;\
346 }\
347}\
348\
45553457 349static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
350{\
351 int i;\
352 for(i=0; i<h; i++){\
353 const uint64_t a= LD64(pixels );\
354 const uint64_t b= LD64(pixels+1);\
355 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
356 pixels+=line_size;\
357 block +=line_size;\
358 }\
359}\
360\
45553457 361static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
362{\
363 int i;\
364 for(i=0; i<h; i++){\
365 const uint64_t a= LD64(pixels );\
366 const uint64_t b= LD64(pixels+line_size);\
367 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
368 pixels+=line_size;\
369 block +=line_size;\
370 }\
371}\
372\
45553457 373static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
374{\
375 int i;\
376 for(i=0; i<h; i++){\
377 const uint64_t a= LD64(pixels );\
378 const uint64_t b= LD64(pixels+line_size);\
379 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
380 pixels+=line_size;\
381 block +=line_size;\
382 }\
383}\
384\
45553457 385static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
386{\
387 int i;\
388 const uint64_t a= LD64(pixels );\
389 const uint64_t b= LD64(pixels+1);\
390 uint64_t l0= (a&0x0303030303030303ULL)\
391 + (b&0x0303030303030303ULL)\
392 + 0x0202020202020202ULL;\
393 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
394 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
395 uint64_t l1,h1;\
396\
397 pixels+=line_size;\
398 for(i=0; i<h; i+=2){\
399 uint64_t a= LD64(pixels );\
400 uint64_t b= LD64(pixels+1);\
401 l1= (a&0x0303030303030303ULL)\
402 + (b&0x0303030303030303ULL);\
403 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
404 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
405 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
406 pixels+=line_size;\
407 block +=line_size;\
408 a= LD64(pixels );\
409 b= LD64(pixels+1);\
410 l0= (a&0x0303030303030303ULL)\
411 + (b&0x0303030303030303ULL)\
412 + 0x0202020202020202ULL;\
413 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
414 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
415 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
416 pixels+=line_size;\
417 block +=line_size;\
418 }\
419}\
420\
45553457 421static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
422{\
423 int i;\
424 const uint64_t a= LD64(pixels );\
425 const uint64_t b= LD64(pixels+1);\
426 uint64_t l0= (a&0x0303030303030303ULL)\
427 + (b&0x0303030303030303ULL)\
428 + 0x0101010101010101ULL;\
429 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
430 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
431 uint64_t l1,h1;\
432\
433 pixels+=line_size;\
434 for(i=0; i<h; i+=2){\
435 uint64_t a= LD64(pixels );\
436 uint64_t b= LD64(pixels+1);\
437 l1= (a&0x0303030303030303ULL)\
438 + (b&0x0303030303030303ULL);\
439 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
440 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
441 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
442 pixels+=line_size;\
443 block +=line_size;\
444 a= LD64(pixels );\
445 b= LD64(pixels+1);\
446 l0= (a&0x0303030303030303ULL)\
447 + (b&0x0303030303030303ULL)\
448 + 0x0101010101010101ULL;\
449 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
450 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
451 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
452 pixels+=line_size;\
453 block +=line_size;\
454 }\
455}\
456\
45553457
ZK
457CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
458CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
459CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
460CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
461CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
462CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
463CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
59fe111e
MN
464
465#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
466#else // 64 bit variant
467
468#define PIXOP2(OPNAME, OP) \
669ac79c
MN
469static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
470 int i;\
471 for(i=0; i<h; i++){\
472 OP(*((uint16_t*)(block )), LD16(pixels ));\
473 pixels+=line_size;\
474 block +=line_size;\
475 }\
476}\
0da71265
MN
477static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
478 int i;\
479 for(i=0; i<h; i++){\
480 OP(*((uint32_t*)(block )), LD32(pixels ));\
481 pixels+=line_size;\
482 block +=line_size;\
483 }\
484}\
45553457 485static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
59fe111e
MN
486 int i;\
487 for(i=0; i<h; i++){\
488 OP(*((uint32_t*)(block )), LD32(pixels ));\
489 OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
490 pixels+=line_size;\
491 block +=line_size;\
492 }\
493}\
45553457
ZK
494static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
495 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
b3184779 496}\
59fe111e 497\
b3184779
MN
498static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
499 int src_stride1, int src_stride2, int h){\
59fe111e
MN
500 int i;\
501 for(i=0; i<h; i++){\
b3184779
MN
502 uint32_t a,b;\
503 a= LD32(&src1[i*src_stride1 ]);\
504 b= LD32(&src2[i*src_stride2 ]);\
d8085ea7 505 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
b3184779
MN
506 a= LD32(&src1[i*src_stride1+4]);\
507 b= LD32(&src2[i*src_stride2+4]);\
d8085ea7 508 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
59fe111e
MN
509 }\
510}\
511\
b3184779
MN
512static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
513 int src_stride1, int src_stride2, int h){\
59fe111e
MN
514 int i;\
515 for(i=0; i<h; i++){\
b3184779
MN
516 uint32_t a,b;\
517 a= LD32(&src1[i*src_stride1 ]);\
518 b= LD32(&src2[i*src_stride2 ]);\
d8085ea7 519 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
b3184779
MN
520 a= LD32(&src1[i*src_stride1+4]);\
521 b= LD32(&src2[i*src_stride2+4]);\
d8085ea7 522 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
59fe111e
MN
523 }\
524}\
525\
0da71265
MN
526static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
527 int src_stride1, int src_stride2, int h){\
528 int i;\
529 for(i=0; i<h; i++){\
530 uint32_t a,b;\
531 a= LD32(&src1[i*src_stride1 ]);\
532 b= LD32(&src2[i*src_stride2 ]);\
d8085ea7 533 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
0da71265
MN
534 }\
535}\
536\
669ac79c
MN
537static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
538 int src_stride1, int src_stride2, int h){\
539 int i;\
540 for(i=0; i<h; i++){\
541 uint32_t a,b;\
542 a= LD16(&src1[i*src_stride1 ]);\
543 b= LD16(&src2[i*src_stride2 ]);\
544 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
545 }\
546}\
547\
b3184779
MN
548static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
549 int src_stride1, int src_stride2, int h){\
550 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
551 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
552}\
553\
554static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
555 int src_stride1, int src_stride2, int h){\
556 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
557 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
558}\
559\
45553457 560static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
561 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
562}\
563\
45553457 564static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
565 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
566}\
567\
45553457 568static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
569 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
570}\
571\
45553457 572static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
573 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
574}\
575\
576static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
577 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
59fe111e
MN
578 int i;\
579 for(i=0; i<h; i++){\
b3184779
MN
580 uint32_t a, b, c, d, l0, l1, h0, h1;\
581 a= LD32(&src1[i*src_stride1]);\
582 b= LD32(&src2[i*src_stride2]);\
583 c= LD32(&src3[i*src_stride3]);\
584 d= LD32(&src4[i*src_stride4]);\
585 l0= (a&0x03030303UL)\
586 + (b&0x03030303UL)\
587 + 0x02020202UL;\
588 h0= ((a&0xFCFCFCFCUL)>>2)\
589 + ((b&0xFCFCFCFCUL)>>2);\
590 l1= (c&0x03030303UL)\
591 + (d&0x03030303UL);\
592 h1= ((c&0xFCFCFCFCUL)>>2)\
593 + ((d&0xFCFCFCFCUL)>>2);\
594 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
595 a= LD32(&src1[i*src_stride1+4]);\
596 b= LD32(&src2[i*src_stride2+4]);\
597 c= LD32(&src3[i*src_stride3+4]);\
598 d= LD32(&src4[i*src_stride4+4]);\
599 l0= (a&0x03030303UL)\
600 + (b&0x03030303UL)\
601 + 0x02020202UL;\
602 h0= ((a&0xFCFCFCFCUL)>>2)\
603 + ((b&0xFCFCFCFCUL)>>2);\
604 l1= (c&0x03030303UL)\
605 + (d&0x03030303UL);\
606 h1= ((c&0xFCFCFCFCUL)>>2)\
607 + ((d&0xFCFCFCFCUL)>>2);\
608 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
59fe111e
MN
609 }\
610}\
669ac79c
MN
611\
612static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
613 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
614}\
615\
616static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
617 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
618}\
619\
620static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
621 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
622}\
623\
624static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
625 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
626}\
627\
b3184779
MN
628static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
629 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
59fe111e
MN
630 int i;\
631 for(i=0; i<h; i++){\
b3184779
MN
632 uint32_t a, b, c, d, l0, l1, h0, h1;\
633 a= LD32(&src1[i*src_stride1]);\
634 b= LD32(&src2[i*src_stride2]);\
635 c= LD32(&src3[i*src_stride3]);\
636 d= LD32(&src4[i*src_stride4]);\
637 l0= (a&0x03030303UL)\
638 + (b&0x03030303UL)\
639 + 0x01010101UL;\
640 h0= ((a&0xFCFCFCFCUL)>>2)\
641 + ((b&0xFCFCFCFCUL)>>2);\
642 l1= (c&0x03030303UL)\
643 + (d&0x03030303UL);\
644 h1= ((c&0xFCFCFCFCUL)>>2)\
645 + ((d&0xFCFCFCFCUL)>>2);\
646 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
647 a= LD32(&src1[i*src_stride1+4]);\
648 b= LD32(&src2[i*src_stride2+4]);\
649 c= LD32(&src3[i*src_stride3+4]);\
650 d= LD32(&src4[i*src_stride4+4]);\
651 l0= (a&0x03030303UL)\
652 + (b&0x03030303UL)\
653 + 0x01010101UL;\
654 h0= ((a&0xFCFCFCFCUL)>>2)\
655 + ((b&0xFCFCFCFCUL)>>2);\
656 l1= (c&0x03030303UL)\
657 + (d&0x03030303UL);\
658 h1= ((c&0xFCFCFCFCUL)>>2)\
659 + ((d&0xFCFCFCFCUL)>>2);\
660 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
59fe111e
MN
661 }\
662}\
b3184779
MN
663static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
664 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
665 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
666 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
667}\
668static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
669 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
670 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
671 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
672}\
59fe111e 673\
669ac79c
MN
674static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
675{\
676 int i, a0, b0, a1, b1;\
677 a0= pixels[0];\
678 b0= pixels[1] + 2;\
679 a0 += b0;\
680 b0 += pixels[2];\
681\
682 pixels+=line_size;\
683 for(i=0; i<h; i+=2){\
684 a1= pixels[0];\
685 b1= pixels[1];\
686 a1 += b1;\
687 b1 += pixels[2];\
688\
689 block[0]= (a1+a0)>>2; /* FIXME non put */\
690 block[1]= (b1+b0)>>2;\
691\
692 pixels+=line_size;\
693 block +=line_size;\
694\
695 a0= pixels[0];\
696 b0= pixels[1] + 2;\
697 a0 += b0;\
698 b0 += pixels[2];\
699\
700 block[0]= (a1+a0)>>2;\
701 block[1]= (b1+b0)>>2;\
702 pixels+=line_size;\
703 block +=line_size;\
704 }\
705}\
706\
707static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
708{\
709 int i;\
710 const uint32_t a= LD32(pixels );\
711 const uint32_t b= LD32(pixels+1);\
712 uint32_t l0= (a&0x03030303UL)\
713 + (b&0x03030303UL)\
714 + 0x02020202UL;\
715 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
716 + ((b&0xFCFCFCFCUL)>>2);\
717 uint32_t l1,h1;\
718\
719 pixels+=line_size;\
720 for(i=0; i<h; i+=2){\
721 uint32_t a= LD32(pixels );\
722 uint32_t b= LD32(pixels+1);\
723 l1= (a&0x03030303UL)\
724 + (b&0x03030303UL);\
725 h1= ((a&0xFCFCFCFCUL)>>2)\
726 + ((b&0xFCFCFCFCUL)>>2);\
727 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
728 pixels+=line_size;\
729 block +=line_size;\
730 a= LD32(pixels );\
731 b= LD32(pixels+1);\
732 l0= (a&0x03030303UL)\
733 + (b&0x03030303UL)\
734 + 0x02020202UL;\
735 h0= ((a&0xFCFCFCFCUL)>>2)\
736 + ((b&0xFCFCFCFCUL)>>2);\
737 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
738 pixels+=line_size;\
739 block +=line_size;\
740 }\
741}\
742\
45553457 743static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
744{\
745 int j;\
746 for(j=0; j<2; j++){\
747 int i;\
748 const uint32_t a= LD32(pixels );\
749 const uint32_t b= LD32(pixels+1);\
750 uint32_t l0= (a&0x03030303UL)\
751 + (b&0x03030303UL)\
752 + 0x02020202UL;\
753 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
754 + ((b&0xFCFCFCFCUL)>>2);\
755 uint32_t l1,h1;\
756\
757 pixels+=line_size;\
758 for(i=0; i<h; i+=2){\
759 uint32_t a= LD32(pixels );\
760 uint32_t b= LD32(pixels+1);\
761 l1= (a&0x03030303UL)\
762 + (b&0x03030303UL);\
763 h1= ((a&0xFCFCFCFCUL)>>2)\
764 + ((b&0xFCFCFCFCUL)>>2);\
765 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
766 pixels+=line_size;\
767 block +=line_size;\
768 a= LD32(pixels );\
769 b= LD32(pixels+1);\
770 l0= (a&0x03030303UL)\
771 + (b&0x03030303UL)\
772 + 0x02020202UL;\
773 h0= ((a&0xFCFCFCFCUL)>>2)\
774 + ((b&0xFCFCFCFCUL)>>2);\
775 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
776 pixels+=line_size;\
777 block +=line_size;\
778 }\
779 pixels+=4-line_size*(h+1);\
780 block +=4-line_size*h;\
781 }\
782}\
783\
45553457 784static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
785{\
786 int j;\
787 for(j=0; j<2; j++){\
788 int i;\
789 const uint32_t a= LD32(pixels );\
790 const uint32_t b= LD32(pixels+1);\
791 uint32_t l0= (a&0x03030303UL)\
792 + (b&0x03030303UL)\
793 + 0x01010101UL;\
794 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
795 + ((b&0xFCFCFCFCUL)>>2);\
796 uint32_t l1,h1;\
797\
798 pixels+=line_size;\
799 for(i=0; i<h; i+=2){\
800 uint32_t a= LD32(pixels );\
801 uint32_t b= LD32(pixels+1);\
802 l1= (a&0x03030303UL)\
803 + (b&0x03030303UL);\
804 h1= ((a&0xFCFCFCFCUL)>>2)\
805 + ((b&0xFCFCFCFCUL)>>2);\
806 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
807 pixels+=line_size;\
808 block +=line_size;\
809 a= LD32(pixels );\
810 b= LD32(pixels+1);\
811 l0= (a&0x03030303UL)\
812 + (b&0x03030303UL)\
813 + 0x01010101UL;\
814 h0= ((a&0xFCFCFCFCUL)>>2)\
815 + ((b&0xFCFCFCFCUL)>>2);\
816 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
817 pixels+=line_size;\
818 block +=line_size;\
819 }\
820 pixels+=4-line_size*(h+1);\
821 block +=4-line_size*h;\
822 }\
823}\
824\
45553457
ZK
825CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
826CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
827CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
828CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
829CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
830CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
831CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
832CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
b3184779 833
d8085ea7 834#define op_avg(a, b) a = rnd_avg32(a, b)
59fe111e 835#endif
59fe111e
MN
836#define op_put(a, b) a = b
837
838PIXOP2(avg, op_avg)
839PIXOP2(put, op_put)
840#undef op_avg
841#undef op_put
842
de6d9b64
FB
843#define avg2(a,b) ((a+b+1)>>1)
844#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
845
073b013d 846
0c1a9eda 847static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
44eb4951
MN
848{
849 const int A=(16-x16)*(16-y16);
850 const int B=( x16)*(16-y16);
851 const int C=(16-x16)*( y16);
852 const int D=( x16)*( y16);
853 int i;
44eb4951
MN
854
855 for(i=0; i<h; i++)
856 {
b3184779
MN
857 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
858 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
859 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
860 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
861 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
862 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
863 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
864 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
865 dst+= stride;
866 src+= stride;
44eb4951
MN
867 }
868}
869
0c1a9eda 870static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
073b013d
MN
871 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
872{
873 int y, vx, vy;
874 const int s= 1<<shift;
875
876 width--;
877 height--;
878
879 for(y=0; y<h; y++){
880 int x;
881
882 vx= ox;
883 vy= oy;
884 for(x=0; x<8; x++){ //XXX FIXME optimize
885 int src_x, src_y, frac_x, frac_y, index;
886
887 src_x= vx>>16;
888 src_y= vy>>16;
889 frac_x= src_x&(s-1);
890 frac_y= src_y&(s-1);
891 src_x>>=shift;
892 src_y>>=shift;
893
894 if((unsigned)src_x < width){
895 if((unsigned)src_y < height){
896 index= src_x + src_y*stride;
897 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
898 + src[index +1]* frac_x )*(s-frac_y)
899 + ( src[index+stride ]*(s-frac_x)
900 + src[index+stride+1]* frac_x )* frac_y
901 + r)>>(shift*2);
902 }else{
903 index= src_x + clip(src_y, 0, height)*stride;
904 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
905 + src[index +1]* frac_x )*s
906 + r)>>(shift*2);
907 }
908 }else{
909 if((unsigned)src_y < height){
910 index= clip(src_x, 0, width) + src_y*stride;
911 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
912 + src[index+stride ]* frac_y )*s
913 + r)>>(shift*2);
914 }else{
915 index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride;
916 dst[y*stride + x]= src[index ];
917 }
918 }
919
920 vx+= dxx;
921 vy+= dyx;
922 }
923 ox += dxy;
924 oy += dyy;
925 }
926}
669ac79c
MN
927
928static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
929 switch(width){
930 case 2: put_pixels2_c (dst, src, stride, height); break;
931 case 4: put_pixels4_c (dst, src, stride, height); break;
932 case 8: put_pixels8_c (dst, src, stride, height); break;
933 case 16:put_pixels16_c(dst, src, stride, height); break;
934 }
935}
936
937static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
938 int i,j;
939 for (i=0; i < height; i++) {
940 for (j=0; j < width; j++) {
941 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
942 }
943 src += stride;
944 dst += stride;
945 }
946}
947
948static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
949 int i,j;
950 for (i=0; i < height; i++) {
951 for (j=0; j < width; j++) {
952 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
953 }
954 src += stride;
955 dst += stride;
956 }
957}
958
959static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
960 int i,j;
961 for (i=0; i < height; i++) {
962 for (j=0; j < width; j++) {
963 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
964 }
965 src += stride;
966 dst += stride;
967 }
968}
969
970static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
971 int i,j;
972 for (i=0; i < height; i++) {
973 for (j=0; j < width; j++) {
974 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
975 }
976 src += stride;
977 dst += stride;
978 }
979}
980
981static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
982 int i,j;
983 for (i=0; i < height; i++) {
984 for (j=0; j < width; j++) {
985 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
986 }
987 src += stride;
988 dst += stride;
989 }
990}
991
992static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
993 int i,j;
994 for (i=0; i < height; i++) {
995 for (j=0; j < width; j++) {
996 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
997 }
998 src += stride;
999 dst += stride;
1000 }
1001}
1002
1003static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1004 int i,j;
1005 for (i=0; i < height; i++) {
1006 for (j=0; j < width; j++) {
1007 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
1008 }
1009 src += stride;
1010 dst += stride;
1011 }
1012}
1013
1014static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1015 int i,j;
1016 for (i=0; i < height; i++) {
1017 for (j=0; j < width; j++) {
1018 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
1019 }
1020 src += stride;
1021 dst += stride;
1022 }
1023}
1024#if 0
1025#define TPEL_WIDTH(width)\
1026static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1027 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
1028static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1029 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
1030static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1031 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
1032static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1033 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
1034static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1035 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
1036static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1037 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
1038static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1039 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
1040static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1041 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
1042static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1043 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
1044#endif
1045
0da71265
MN
1046#define H264_CHROMA_MC(OPNAME, OP)\
1047static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1048 const int A=(8-x)*(8-y);\
1049 const int B=( x)*(8-y);\
1050 const int C=(8-x)*( y);\
1051 const int D=( x)*( y);\
1052 int i;\
1053 \
1054 assert(x<8 && y<8 && x>=0 && y>=0);\
1055\
1056 for(i=0; i<h; i++)\
1057 {\
1058 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1059 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1060 dst+= stride;\
1061 src+= stride;\
1062 }\
1063}\
1064\
1065static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1066 const int A=(8-x)*(8-y);\
1067 const int B=( x)*(8-y);\
1068 const int C=(8-x)*( y);\
1069 const int D=( x)*( y);\
1070 int i;\
1071 \
1072 assert(x<8 && y<8 && x>=0 && y>=0);\
1073\
1074 for(i=0; i<h; i++)\
1075 {\
1076 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1077 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1078 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1079 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1080 dst+= stride;\
1081 src+= stride;\
1082 }\
1083}\
1084\
1085static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1086 const int A=(8-x)*(8-y);\
1087 const int B=( x)*(8-y);\
1088 const int C=(8-x)*( y);\
1089 const int D=( x)*( y);\
1090 int i;\
1091 \
1092 assert(x<8 && y<8 && x>=0 && y>=0);\
1093\
1094 for(i=0; i<h; i++)\
1095 {\
1096 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1097 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1098 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1099 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1100 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
1101 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
1102 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
1103 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
1104 dst+= stride;\
1105 src+= stride;\
1106 }\
1107}
1108
1109#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
1110#define op_put(a, b) a = (((b) + 32)>>6)
1111
1112H264_CHROMA_MC(put_ , op_put)
1113H264_CHROMA_MC(avg_ , op_avg)
1114#undef op_avg
1115#undef op_put
1116
1117static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
1118{
1119 int i;
1120 for(i=0; i<h; i++)
1121 {
1122 ST32(dst , LD32(src ));
1123 dst+=dstStride;
1124 src+=srcStride;
1125 }
1126}
1127
1128static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
1129{
1130 int i;
1131 for(i=0; i<h; i++)
1132 {
1133 ST32(dst , LD32(src ));
1134 ST32(dst+4 , LD32(src+4 ));
1135 dst+=dstStride;
1136 src+=srcStride;
1137 }
1138}
1139
1140static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
1141{
1142 int i;
1143 for(i=0; i<h; i++)
1144 {
1145 ST32(dst , LD32(src ));
1146 ST32(dst+4 , LD32(src+4 ));
1147 ST32(dst+8 , LD32(src+8 ));
1148 ST32(dst+12, LD32(src+12));
1149 dst+=dstStride;
1150 src+=srcStride;
1151 }
1152}
073b013d 1153
0c1a9eda 1154static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
44eb4951 1155{
44eb4951
MN
1156 int i;
1157 for(i=0; i<h; i++)
1158 {
b3184779
MN
1159 ST32(dst , LD32(src ));
1160 ST32(dst+4 , LD32(src+4 ));
1161 ST32(dst+8 , LD32(src+8 ));
1162 ST32(dst+12, LD32(src+12));
1163 dst[16]= src[16];
44eb4951
MN
1164 dst+=dstStride;
1165 src+=srcStride;
1166 }
1167}
1168
0c1a9eda 1169static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
44eb4951
MN
1170{
1171 int i;
b3184779 1172 for(i=0; i<h; i++)
44eb4951 1173 {
b3184779
MN
1174 ST32(dst , LD32(src ));
1175 ST32(dst+4 , LD32(src+4 ));
1176 dst[8]= src[8];
44eb4951
MN
1177 dst+=dstStride;
1178 src+=srcStride;
1179 }
1180}
1181
826f429a 1182
b3184779 1183#define QPEL_MC(r, OPNAME, RND, OP) \
0c1a9eda
ZK
1184static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1185 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
b3184779
MN
1186 int i;\
1187 for(i=0; i<h; i++)\
1188 {\
1189 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
1190 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
1191 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
1192 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
1193 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
1194 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
1195 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
1196 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
1197 dst+=dstStride;\
1198 src+=srcStride;\
1199 }\
44eb4951
MN
1200}\
1201\
0c1a9eda 1202static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
db794953 1203 const int w=8;\
0c1a9eda 1204 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
b3184779
MN
1205 int i;\
1206 for(i=0; i<w; i++)\
1207 {\
1208 const int src0= src[0*srcStride];\
1209 const int src1= src[1*srcStride];\
1210 const int src2= src[2*srcStride];\
1211 const int src3= src[3*srcStride];\
1212 const int src4= src[4*srcStride];\
1213 const int src5= src[5*srcStride];\
1214 const int src6= src[6*srcStride];\
1215 const int src7= src[7*srcStride];\
1216 const int src8= src[8*srcStride];\
1217 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
1218 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
1219 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
1220 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
1221 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
1222 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
1223 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
1224 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
1225 dst++;\
1226 src++;\
1227 }\
1228}\
1229\
0c1a9eda
ZK
1230static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1231 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
b3184779 1232 int i;\
826f429a 1233 \
b3184779
MN
1234 for(i=0; i<h; i++)\
1235 {\
1236 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
1237 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
1238 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
1239 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
1240 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
1241 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
1242 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
1243 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
1244 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
1245 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
1246 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
1247 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
1248 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
1249 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
1250 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
1251 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
1252 dst+=dstStride;\
1253 src+=srcStride;\
1254 }\
1255}\
1256\
0c1a9eda
ZK
1257static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1258 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
b3184779 1259 int i;\
826f429a 1260 const int w=16;\
b3184779
MN
1261 for(i=0; i<w; i++)\
1262 {\
1263 const int src0= src[0*srcStride];\
1264 const int src1= src[1*srcStride];\
1265 const int src2= src[2*srcStride];\
1266 const int src3= src[3*srcStride];\
1267 const int src4= src[4*srcStride];\
1268 const int src5= src[5*srcStride];\
1269 const int src6= src[6*srcStride];\
1270 const int src7= src[7*srcStride];\
1271 const int src8= src[8*srcStride];\
1272 const int src9= src[9*srcStride];\
1273 const int src10= src[10*srcStride];\
1274 const int src11= src[11*srcStride];\
1275 const int src12= src[12*srcStride];\
1276 const int src13= src[13*srcStride];\
1277 const int src14= src[14*srcStride];\
1278 const int src15= src[15*srcStride];\
1279 const int src16= src[16*srcStride];\
1280 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
1281 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
1282 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
1283 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
1284 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
1285 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
1286 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
1287 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
1288 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
1289 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
1290 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
1291 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
1292 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
1293 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
1294 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
1295 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
1296 dst++;\
1297 src++;\
1298 }\
1299}\
1300\
0c1a9eda 1301static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
45553457 1302 OPNAME ## pixels8_c(dst, src, stride, 8);\
b3184779
MN
1303}\
1304\
0c1a9eda
ZK
1305static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1306 uint8_t half[64];\
b3184779
MN
1307 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1308 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
44eb4951
MN
1309}\
1310\
0c1a9eda 1311static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
b3184779 1312 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
44eb4951
MN
1313}\
1314\
0c1a9eda
ZK
1315static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1316 uint8_t half[64];\
b3184779
MN
1317 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1318 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
44eb4951
MN
1319}\
1320\
0c1a9eda
ZK
1321static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1322 uint8_t full[16*9];\
1323 uint8_t half[64];\
b3184779 1324 copy_block9(full, src, 16, stride, 9);\
db794953 1325 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
b3184779 1326 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
44eb4951
MN
1327}\
1328\
0c1a9eda
ZK
1329static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1330 uint8_t full[16*9];\
b3184779 1331 copy_block9(full, src, 16, stride, 9);\
db794953 1332 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
44eb4951
MN
1333}\
1334\
0c1a9eda
ZK
1335static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1336 uint8_t full[16*9];\
1337 uint8_t half[64];\
b3184779 1338 copy_block9(full, src, 16, stride, 9);\
db794953 1339 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
b3184779 1340 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
44eb4951 1341}\
0c1a9eda
ZK
1342void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1343 uint8_t full[16*9];\
1344 uint8_t halfH[72];\
1345 uint8_t halfV[64];\
1346 uint8_t halfHV[64];\
b3184779
MN
1347 copy_block9(full, src, 16, stride, 9);\
1348 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1349 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1350 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1351 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1352}\
0c1a9eda
ZK
1353static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1354 uint8_t full[16*9];\
1355 uint8_t halfH[72];\
1356 uint8_t halfHV[64];\
db794953
MN
1357 copy_block9(full, src, 16, stride, 9);\
1358 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1359 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1360 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1361 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1362}\
0c1a9eda
ZK
1363void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1364 uint8_t full[16*9];\
1365 uint8_t halfH[72];\
1366 uint8_t halfV[64];\
1367 uint8_t halfHV[64];\
b3184779
MN
1368 copy_block9(full, src, 16, stride, 9);\
1369 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1370 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1371 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1372 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1373}\
0c1a9eda
ZK
1374static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1375 uint8_t full[16*9];\
1376 uint8_t halfH[72];\
1377 uint8_t halfHV[64];\
db794953
MN
1378 copy_block9(full, src, 16, stride, 9);\
1379 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1380 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1381 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1382 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1383}\
0c1a9eda
ZK
1384void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1385 uint8_t full[16*9];\
1386 uint8_t halfH[72];\
1387 uint8_t halfV[64];\
1388 uint8_t halfHV[64];\
b3184779
MN
1389 copy_block9(full, src, 16, stride, 9);\
1390 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1391 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1392 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1393 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1394}\
0c1a9eda
ZK
1395static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1396 uint8_t full[16*9];\
1397 uint8_t halfH[72];\
1398 uint8_t halfHV[64];\
db794953
MN
1399 copy_block9(full, src, 16, stride, 9);\
1400 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1401 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1402 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1403 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1404}\
0c1a9eda
ZK
1405void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1406 uint8_t full[16*9];\
1407 uint8_t halfH[72];\
1408 uint8_t halfV[64];\
1409 uint8_t halfHV[64];\
b3184779
MN
1410 copy_block9(full, src, 16, stride, 9);\
1411 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
db794953
MN
1412 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1413 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1414 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1415}\
0c1a9eda
ZK
1416static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1417 uint8_t full[16*9];\
1418 uint8_t halfH[72];\
1419 uint8_t halfHV[64];\
db794953
MN
1420 copy_block9(full, src, 16, stride, 9);\
1421 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1422 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1423 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1424 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1425}\
0c1a9eda
ZK
1426static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1427 uint8_t halfH[72];\
1428 uint8_t halfHV[64];\
b3184779 1429 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1430 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1431 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
44eb4951 1432}\
0c1a9eda
ZK
1433static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1434 uint8_t halfH[72];\
1435 uint8_t halfHV[64];\
b3184779 1436 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1437 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1438 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
44eb4951 1439}\
0c1a9eda
ZK
1440void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1441 uint8_t full[16*9];\
1442 uint8_t halfH[72];\
1443 uint8_t halfV[64];\
1444 uint8_t halfHV[64];\
b3184779
MN
1445 copy_block9(full, src, 16, stride, 9);\
1446 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1447 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1448 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1449 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 1450}\
0c1a9eda
ZK
1451static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1452 uint8_t full[16*9];\
1453 uint8_t halfH[72];\
db794953
MN
1454 copy_block9(full, src, 16, stride, 9);\
1455 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1456 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1457 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1458}\
0c1a9eda
ZK
1459void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1460 uint8_t full[16*9];\
1461 uint8_t halfH[72];\
1462 uint8_t halfV[64];\
1463 uint8_t halfHV[64];\
b3184779
MN
1464 copy_block9(full, src, 16, stride, 9);\
1465 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1466 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1467 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1468 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 1469}\
0c1a9eda
ZK
1470static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1471 uint8_t full[16*9];\
1472 uint8_t halfH[72];\
db794953
MN
1473 copy_block9(full, src, 16, stride, 9);\
1474 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1475 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1476 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1477}\
0c1a9eda
ZK
1478static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1479 uint8_t halfH[72];\
b3184779 1480 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1481 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
b3184779 1482}\
0c1a9eda 1483static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
45553457 1484 OPNAME ## pixels16_c(dst, src, stride, 16);\
b3184779
MN
1485}\
1486\
0c1a9eda
ZK
1487static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1488 uint8_t half[256];\
b3184779
MN
1489 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1490 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
1491}\
1492\
0c1a9eda 1493static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
b3184779 1494 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
44eb4951 1495}\
b3184779 1496\
0c1a9eda
ZK
1497static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1498 uint8_t half[256];\
b3184779
MN
1499 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1500 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
1501}\
1502\
0c1a9eda
ZK
1503static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1504 uint8_t full[24*17];\
1505 uint8_t half[256];\
b3184779 1506 copy_block17(full, src, 24, stride, 17);\
826f429a 1507 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
b3184779
MN
1508 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
1509}\
1510\
0c1a9eda
ZK
1511static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1512 uint8_t full[24*17];\
b3184779 1513 copy_block17(full, src, 24, stride, 17);\
826f429a 1514 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
b3184779
MN
1515}\
1516\
0c1a9eda
ZK
1517static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1518 uint8_t full[24*17];\
1519 uint8_t half[256];\
b3184779 1520 copy_block17(full, src, 24, stride, 17);\
826f429a 1521 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
b3184779
MN
1522 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
1523}\
0c1a9eda
ZK
1524void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1525 uint8_t full[24*17];\
1526 uint8_t halfH[272];\
1527 uint8_t halfV[256];\
1528 uint8_t halfHV[256];\
b3184779
MN
1529 copy_block17(full, src, 24, stride, 17);\
1530 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1531 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1532 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1533 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1534}\
0c1a9eda
ZK
1535static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1536 uint8_t full[24*17];\
1537 uint8_t halfH[272];\
1538 uint8_t halfHV[256];\
db794953
MN
1539 copy_block17(full, src, 24, stride, 17);\
1540 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1541 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1542 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1543 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1544}\
0c1a9eda
ZK
1545void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1546 uint8_t full[24*17];\
1547 uint8_t halfH[272];\
1548 uint8_t halfV[256];\
1549 uint8_t halfHV[256];\
b3184779
MN
1550 copy_block17(full, src, 24, stride, 17);\
1551 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1552 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1553 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1554 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1555}\
0c1a9eda
ZK
1556static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1557 uint8_t full[24*17];\
1558 uint8_t halfH[272];\
1559 uint8_t halfHV[256];\
db794953
MN
1560 copy_block17(full, src, 24, stride, 17);\
1561 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1562 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1563 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1564 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1565}\
0c1a9eda
ZK
1566void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1567 uint8_t full[24*17];\
1568 uint8_t halfH[272];\
1569 uint8_t halfV[256];\
1570 uint8_t halfHV[256];\
b3184779
MN
1571 copy_block17(full, src, 24, stride, 17);\
1572 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1573 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1574 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1575 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1576}\
0c1a9eda
ZK
1577static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1578 uint8_t full[24*17];\
1579 uint8_t halfH[272];\
1580 uint8_t halfHV[256];\
db794953
MN
1581 copy_block17(full, src, 24, stride, 17);\
1582 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1583 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1584 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1585 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1586}\
0c1a9eda
ZK
1587void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1588 uint8_t full[24*17];\
1589 uint8_t halfH[272];\
1590 uint8_t halfV[256];\
1591 uint8_t halfHV[256];\
b3184779
MN
1592 copy_block17(full, src, 24, stride, 17);\
1593 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
826f429a
MN
1594 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1595 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1596 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1597}\
0c1a9eda
ZK
1598static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1599 uint8_t full[24*17];\
1600 uint8_t halfH[272];\
1601 uint8_t halfHV[256];\
db794953
MN
1602 copy_block17(full, src, 24, stride, 17);\
1603 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1604 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1605 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1606 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1607}\
0c1a9eda
ZK
1608static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1609 uint8_t halfH[272];\
1610 uint8_t halfHV[256];\
b3184779 1611 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1612 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1613 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1614}\
0c1a9eda
ZK
1615static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1616 uint8_t halfH[272];\
1617 uint8_t halfHV[256];\
b3184779 1618 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1619 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1620 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1621}\
0c1a9eda
ZK
1622void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1623 uint8_t full[24*17];\
1624 uint8_t halfH[272];\
1625 uint8_t halfV[256];\
1626 uint8_t halfHV[256];\
b3184779
MN
1627 copy_block17(full, src, 24, stride, 17);\
1628 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1629 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1630 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1631 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
1632}\
0c1a9eda
ZK
1633static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1634 uint8_t full[24*17];\
1635 uint8_t halfH[272];\
db794953
MN
1636 copy_block17(full, src, 24, stride, 17);\
1637 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1638 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1639 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1640}\
0c1a9eda
ZK
1641void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1642 uint8_t full[24*17];\
1643 uint8_t halfH[272];\
1644 uint8_t halfV[256];\
1645 uint8_t halfHV[256];\
b3184779
MN
1646 copy_block17(full, src, 24, stride, 17);\
1647 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1648 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1649 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1650 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
1651}\
0c1a9eda
ZK
1652static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1653 uint8_t full[24*17];\
1654 uint8_t halfH[272];\
db794953
MN
1655 copy_block17(full, src, 24, stride, 17);\
1656 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1657 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1658 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1659}\
0c1a9eda
ZK
1660static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1661 uint8_t halfH[272];\
b3184779 1662 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1663 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
45553457 1664}
44eb4951 1665
b3184779
MN
1666#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1667#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1668#define op_put(a, b) a = cm[((b) + 16)>>5]
1669#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1670
1671QPEL_MC(0, put_ , _ , op_put)
1672QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1673QPEL_MC(0, avg_ , _ , op_avg)
1674//QPEL_MC(1, avg_no_rnd , _ , op_avg)
1675#undef op_avg
1676#undef op_avg_no_rnd
1677#undef op_put
1678#undef op_put_no_rnd
44eb4951 1679
0da71265
MN
1680#if 1
1681#define H264_LOWPASS(OPNAME, OP, OP2) \
1682static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1683 const int h=4;\
1684 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1685 int i;\
1686 for(i=0; i<h; i++)\
1687 {\
1688 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
1689 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
1690 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
1691 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
1692 dst+=dstStride;\
1693 src+=srcStride;\
1694 }\
1695}\
1696\
1697static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1698 const int w=4;\
1699 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1700 int i;\
1701 for(i=0; i<w; i++)\
1702 {\
1703 const int srcB= src[-2*srcStride];\
1704 const int srcA= src[-1*srcStride];\
1705 const int src0= src[0 *srcStride];\
1706 const int src1= src[1 *srcStride];\
1707 const int src2= src[2 *srcStride];\
1708 const int src3= src[3 *srcStride];\
1709 const int src4= src[4 *srcStride];\
1710 const int src5= src[5 *srcStride];\
1711 const int src6= src[6 *srcStride];\
1712 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
1713 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
1714 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
1715 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
1716 dst++;\
1717 src++;\
1718 }\
1719}\
1720\
1721static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
1722 const int h=4;\
1723 const int w=4;\
1724 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1725 int i;\
1726 src -= 2*srcStride;\
1727 for(i=0; i<h+5; i++)\
1728 {\
1729 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
1730 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
1731 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
1732 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
1733 tmp+=tmpStride;\
1734 src+=srcStride;\
1735 }\
1736 tmp -= tmpStride*(h+5-2);\
1737 for(i=0; i<w; i++)\
1738 {\
1739 const int tmpB= tmp[-2*tmpStride];\
1740 const int tmpA= tmp[-1*tmpStride];\
1741 const int tmp0= tmp[0 *tmpStride];\
1742 const int tmp1= tmp[1 *tmpStride];\
1743 const int tmp2= tmp[2 *tmpStride];\
1744 const int tmp3= tmp[3 *tmpStride];\
1745 const int tmp4= tmp[4 *tmpStride];\
1746 const int tmp5= tmp[5 *tmpStride];\
1747 const int tmp6= tmp[6 *tmpStride];\
1748 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
1749 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
1750 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
1751 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
1752 dst++;\
1753 tmp++;\
1754 }\
1755}\
1756\
1757static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1758 const int h=8;\
1759 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1760 int i;\
1761 for(i=0; i<h; i++)\
1762 {\
1763 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
1764 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
1765 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
1766 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
1767 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
1768 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
1769 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
1770 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
1771 dst+=dstStride;\
1772 src+=srcStride;\
1773 }\
1774}\
1775\
1776static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1777 const int w=8;\
1778 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1779 int i;\
1780 for(i=0; i<w; i++)\
1781 {\
1782 const int srcB= src[-2*srcStride];\
1783 const int srcA= src[-1*srcStride];\
1784 const int src0= src[0 *srcStride];\
1785 const int src1= src[1 *srcStride];\
1786 const int src2= src[2 *srcStride];\
1787 const int src3= src[3 *srcStride];\
1788 const int src4= src[4 *srcStride];\
1789 const int src5= src[5 *srcStride];\
1790 const int src6= src[6 *srcStride];\
1791 const int src7= src[7 *srcStride];\
1792 const int src8= src[8 *srcStride];\
1793 const int src9= src[9 *srcStride];\
1794 const int src10=src[10*srcStride];\
1795 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
1796 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
1797 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
1798 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
1799 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
1800 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
1801 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
1802 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
1803 dst++;\
1804 src++;\
1805 }\
1806}\
1807\
1808static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
1809 const int h=8;\
1810 const int w=8;\
1811 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1812 int i;\
1813 src -= 2*srcStride;\
1814 for(i=0; i<h+5; i++)\
1815 {\
1816 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
1817 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
1818 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
1819 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
1820 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
1821 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
1822 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
1823 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
1824 tmp+=tmpStride;\
1825 src+=srcStride;\
1826 }\
1827 tmp -= tmpStride*(h+5-2);\
1828 for(i=0; i<w; i++)\
1829 {\
1830 const int tmpB= tmp[-2*tmpStride];\
1831 const int tmpA= tmp[-1*tmpStride];\
1832 const int tmp0= tmp[0 *tmpStride];\
1833 const int tmp1= tmp[1 *tmpStride];\
1834 const int tmp2= tmp[2 *tmpStride];\
1835 const int tmp3= tmp[3 *tmpStride];\
1836 const int tmp4= tmp[4 *tmpStride];\
1837 const int tmp5= tmp[5 *tmpStride];\
1838 const int tmp6= tmp[6 *tmpStride];\
1839 const int tmp7= tmp[7 *tmpStride];\
1840 const int tmp8= tmp[8 *tmpStride];\
1841 const int tmp9= tmp[9 *tmpStride];\
1842 const int tmp10=tmp[10*tmpStride];\
1843 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
1844 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
1845 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
1846 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
1847 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
1848 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
1849 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
1850 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
1851 dst++;\
1852 tmp++;\
1853 }\
1854}\
1855\
1856static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1857 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
1858 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
1859 src += 8*srcStride;\
1860 dst += 8*dstStride;\
1861 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
1862 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
1863}\
1864\
1865static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1866 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
1867 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
1868 src += 8*srcStride;\
1869 dst += 8*dstStride;\
1870 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
1871 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
1872}\
1873\
1874static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
1875 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
1876 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
1877 src += 8*srcStride;\
1878 tmp += 8*tmpStride;\
1879 dst += 8*dstStride;\
1880 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
1881 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
1882}\
1883
1884#define H264_MC(OPNAME, SIZE) \
1885static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
1886 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
1887}\
1888\
1889static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1890 uint8_t half[SIZE*SIZE];\
1891 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
1892 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
1893}\
1894\
1895static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1896 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
1897}\
1898\
1899static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1900 uint8_t half[SIZE*SIZE];\
1901 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
1902 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
1903}\
1904\
1905static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1906 uint8_t full[SIZE*(SIZE+5)];\
1907 uint8_t * const full_mid= full + SIZE*2;\
1908 uint8_t half[SIZE*SIZE];\
1909 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1910 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
1911 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
1912}\
1913\
1914static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1915 uint8_t full[SIZE*(SIZE+5)];\
1916 uint8_t * const full_mid= full + SIZE*2;\
1917 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1918 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
1919}\
1920\
1921static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1922 uint8_t full[SIZE*(SIZE+5)];\
1923 uint8_t * const full_mid= full + SIZE*2;\
1924 uint8_t half[SIZE*SIZE];\
1925 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1926 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
1927 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
1928}\
1929\
1930static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1931 uint8_t full[SIZE*(SIZE+5)];\
1932 uint8_t * const full_mid= full + SIZE*2;\
1933 uint8_t halfH[SIZE*SIZE];\
1934 uint8_t halfV[SIZE*SIZE];\
1935 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
1936 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1937 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1938 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
1939}\
1940\
1941static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1942 uint8_t full[SIZE*(SIZE+5)];\
1943 uint8_t * const full_mid= full + SIZE*2;\
1944 uint8_t halfH[SIZE*SIZE];\
1945 uint8_t halfV[SIZE*SIZE];\
1946 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
1947 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
1948 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1949 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
1950}\
1951\
1952static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1953 uint8_t full[SIZE*(SIZE+5)];\
1954 uint8_t * const full_mid= full + SIZE*2;\
1955 uint8_t halfH[SIZE*SIZE];\
1956 uint8_t halfV[SIZE*SIZE];\
1957 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
1958 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1959 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1960 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
1961}\
1962\
1963static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1964 uint8_t full[SIZE*(SIZE+5)];\
1965 uint8_t * const full_mid= full + SIZE*2;\
1966 uint8_t halfH[SIZE*SIZE];\
1967 uint8_t halfV[SIZE*SIZE];\
1968 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
1969 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
1970 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1971 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
1972}\
1973\
1974static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1975 int16_t tmp[SIZE*(SIZE+5)];\
1976 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
1977}\
1978\
1979static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1980 int16_t tmp[SIZE*(SIZE+5)];\
1981 uint8_t halfH[SIZE*SIZE];\
1982 uint8_t halfHV[SIZE*SIZE];\
1983 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
1984 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
1985 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
1986}\
1987\
1988static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1989 int16_t tmp[SIZE*(SIZE+5)];\
1990 uint8_t halfH[SIZE*SIZE];\
1991 uint8_t halfHV[SIZE*SIZE];\
1992 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
1993 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
1994 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
1995}\
1996\
1997static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1998 uint8_t full[SIZE*(SIZE+5)];\
1999 uint8_t * const full_mid= full + SIZE*2;\
2000 int16_t tmp[SIZE*(SIZE+5)];\
2001 uint8_t halfV[SIZE*SIZE];\
2002 uint8_t halfHV[SIZE*SIZE];\
2003 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2004 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2005 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2006 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2007}\
2008\
2009static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2010 uint8_t full[SIZE*(SIZE+5)];\
2011 uint8_t * const full_mid= full + SIZE*2;\
2012 int16_t tmp[SIZE*(SIZE+5)];\
2013 uint8_t halfV[SIZE*SIZE];\
2014 uint8_t halfHV[SIZE*SIZE];\
2015 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2016 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2017 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2018 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2019}\
2020
2021#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2022//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
2023#define op_put(a, b) a = cm[((b) + 16)>>5]
2024#define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
2025#define op2_put(a, b) a = cm[((b) + 512)>>10]
2026
2027H264_LOWPASS(put_ , op_put, op2_put)
2028H264_LOWPASS(avg_ , op_avg, op2_avg)
2029H264_MC(put_, 4)
2030H264_MC(put_, 8)
2031H264_MC(put_, 16)
2032H264_MC(avg_, 4)
2033H264_MC(avg_, 8)
2034H264_MC(avg_, 16)
2035
2036#undef op_avg
2037#undef op_put
2038#undef op2_avg
2039#undef op2_put
2040#endif
2041
1457ab52
MN
2042static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
2043 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2044 int i;
2045
2046 for(i=0; i<h; i++){
2047 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
2048 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
2049 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
2050 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
2051 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
2052 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
2053 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
2054 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
2055 dst+=dstStride;
2056 src+=srcStride;
2057 }
2058}
2059
2060static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
2061 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2062 int i;
2063
2064 for(i=0; i<w; i++){
2065 const int src_1= src[ -srcStride];
2066 const int src0 = src[0 ];
2067 const int src1 = src[ srcStride];
2068 const int src2 = src[2*srcStride];
2069 const int src3 = src[3*srcStride];
2070 const int src4 = src[4*srcStride];
2071 const int src5 = src[5*srcStride];
2072 const int src6 = src[6*srcStride];
2073 const int src7 = src[7*srcStride];
2074 const int src8 = src[8*srcStride];
2075 const int src9 = src[9*srcStride];
2076 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
2077 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
2078 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
2079 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
2080 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
2081 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
2082 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
2083 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
2084 src++;
2085 dst++;
2086 }
2087}
2088
2089static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
2090 put_pixels8_c(dst, src, stride, 8);
2091}
2092
2093static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
2094 uint8_t half[64];
2095 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
2096 put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
2097}
2098
2099static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
2100 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
2101}
2102
2103static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
2104 uint8_t half[64];
2105 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
2106 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
2107}
2108
2109static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
2110 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
2111}
2112
2113static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
2114 uint8_t halfH[88];
2115 uint8_t halfV[64];
2116 uint8_t halfHV[64];
2117 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2118 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
2119 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
2120 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
2121}
2122static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
2123 uint8_t halfH[88];
2124 uint8_t halfV[64];
2125 uint8_t halfHV[64];
2126 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2127 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
2128 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
2129 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
2130}
2131static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
2132 uint8_t halfH[88];
2133 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2134 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
2135}
2136
2137
0c1a9eda 2138static inline int pix_abs16x16_c(uint8_t *pix1, uint8_t *pix2, int line_size)
de6d9b64
FB
2139{
2140 int s, i;
2141
2142 s = 0;
ba6802de 2143 for(i=0;i<16;i++) {
de6d9b64
FB
2144 s += abs(pix1[0] - pix2[0]);
2145 s += abs(pix1[1] - pix2[1]);
2146 s += abs(pix1[2] - pix2[2]);
2147 s += abs(pix1[3] - pix2[3]);
2148 s += abs(pix1[4] - pix2[4]);
2149 s += abs(pix1[5] - pix2[5]);
2150 s += abs(pix1[6] - pix2[6]);
2151 s += abs(pix1[7] - pix2[7]);
2152 s += abs(pix1[8] - pix2[8]);
2153 s += abs(pix1[9] - pix2[9]);
2154 s += abs(pix1[10] - pix2[10]);
2155 s += abs(pix1[11] - pix2[11]);
2156 s += abs(pix1[12] - pix2[12]);
2157 s += abs(pix1[13] - pix2[13]);
2158 s += abs(pix1[14] - pix2[14]);
2159 s += abs(pix1[15] - pix2[15]);
2160 pix1 += line_size;
2161 pix2 += line_size;
2162 }
2163 return s;
2164}
2165
0c1a9eda 2166static int pix_abs16x16_x2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
de6d9b64
FB
2167{
2168 int s, i;
2169
2170 s = 0;
ba6802de 2171 for(i=0;i<16;i++) {
de6d9b64
FB
2172 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
2173 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
2174 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
2175 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
2176 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
2177 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
2178 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
2179 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
2180 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
2181 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
2182 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
2183 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
2184 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
2185 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
2186 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
2187 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
2188 pix1 += line_size;
2189 pix2 += line_size;
2190 }
2191 return s;
2192}
2193
0c1a9eda 2194static int pix_abs16x16_y2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
de6d9b64
FB
2195{
2196 int s, i;
0c1a9eda 2197 uint8_t *pix3 = pix2 + line_size;
de6d9b64
FB
2198
2199 s = 0;
ba6802de 2200 for(i=0;i<16;i++) {
de6d9b64
FB
2201 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
2202 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
2203 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
2204 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
2205 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
2206 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
2207 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
2208 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
2209 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
2210 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
2211 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
2212 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
2213 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
2214 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
2215 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
2216 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
2217 pix1 += line_size;
2218 pix2 += line_size;
2219 pix3 += line_size;
2220 }
2221 return s;
2222}
2223
0c1a9eda 2224static int pix_abs16x16_xy2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
de6d9b64
FB
2225{
2226 int s, i;
0c1a9eda 2227 uint8_t *pix3 = pix2 + line_size;
de6d9b64
FB
2228
2229 s = 0;
ba6802de 2230 for(i=0;i<16;i++) {
de6d9b64
FB
2231 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
2232 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
2233 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
2234 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
2235 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
2236 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
2237 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
2238 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
2239 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
2240 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
2241 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
2242 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
2243 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
2244 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
2245 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
2246 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
2247 pix1 += line_size;
2248 pix2 += line_size;
2249 pix3 += line_size;
2250 }
2251 return s;
2252}
2253
0c1a9eda 2254static inline int pix_abs8x8_c(uint8_t *pix1, uint8_t *pix2, int line_size)
ba6802de
MN
2255{
2256 int s, i;
2257
2258 s = 0;
2259 for(i=0;i<8;i++) {
2260 s += abs(pix1[0] - pix2[0]);
2261 s += abs(pix1[1] - pix2[1]);
2262 s += abs(pix1[2] - pix2[2]);
2263 s += abs(pix1[3] - pix2[3]);
2264 s += abs(pix1[4] - pix2[4]);
2265 s += abs(pix1[5] - pix2[5]);
2266 s += abs(pix1[6] - pix2[6]);
2267 s += abs(pix1[7] - pix2[7]);
2268 pix1 += line_size;
2269 pix2 += line_size;
2270 }
2271 return s;
2272}
2273
0c1a9eda 2274static int pix_abs8x8_x2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
ba6802de
MN
2275{
2276 int s, i;
2277
2278 s = 0;
2279 for(i=0;i<8;i++) {
2280 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
2281 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
2282 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
2283 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
2284 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
2285 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
2286 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
2287 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
2288 pix1 += line_size;
2289 pix2 += line_size;
2290 }
2291 return s;
2292}
2293
0c1a9eda 2294static int pix_abs8x8_y2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
ba6802de
MN
2295{
2296 int s, i;
0c1a9eda 2297 uint8_t *pix3 = pix2 + line_size;
ba6802de
MN
2298
2299 s = 0;
2300 for(i=0;i<8;i++) {
2301 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
2302 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
2303 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
2304 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
2305 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
2306 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
2307 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
2308 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
2309 pix1 += line_size;
2310 pix2 += line_size;
2311 pix3 += line_size;
2312 }
2313 return s;
2314}
2315
0c1a9eda 2316static int pix_abs8x8_xy2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
ba6802de
MN
2317{
2318 int s, i;
0c1a9eda 2319 uint8_t *pix3 = pix2 + line_size;
ba6802de
MN
2320
2321 s = 0;
2322 for(i=0;i<8;i++) {
2323 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
2324 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
2325 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
2326 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
2327 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
2328 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
2329 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
2330 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
2331 pix1 += line_size;
2332 pix2 += line_size;
2333 pix3 += line_size;
2334 }
2335 return s;
2336}
2337
1457ab52
MN
2338static int sad16x16_c(void *s, uint8_t *a, uint8_t *b, int stride){
2339 return pix_abs16x16_c(a,b,stride);
2340}
2341
2342static int sad8x8_c(void *s, uint8_t *a, uint8_t *b, int stride){
2343 return pix_abs8x8_c(a,b,stride);
2344}
2345
a9badb51
MN
2346/**
2347 * permutes an 8x8 block.
2a5700de 2348 * @param block the block which will be permuted according to the given permutation vector
a9badb51
MN
2349 * @param permutation the permutation vector
2350 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
2a5700de
MN
2351 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
2352 * (inverse) permutated to scantable order!
a9badb51 2353 */
0c1a9eda 2354void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
d962f6fd 2355{
7801d21d 2356 int i;
477ab036 2357 DCTELEM temp[64];
7801d21d
MN
2358
2359 if(last<=0) return;
9a7b310d 2360 //if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms
d962f6fd 2361
7801d21d
MN
2362 for(i=0; i<=last; i++){
2363 const int j= scantable[i];
2364 temp[j]= block[j];
2365 block[j]=0;
2366 }
2367
2368 for(i=0; i<=last; i++){
2369 const int j= scantable[i];
2370 const int perm_j= permutation[j];
2371 block[perm_j]= temp[j];
2372 }
d962f6fd 2373}
e0eac44e 2374
2a5700de
MN
2375/**
2376 * memset(blocks, 0, sizeof(DCTELEM)*6*64)
2377 */
eb4b3dd3 2378static void clear_blocks_c(DCTELEM *blocks)
649c00c9
MN
2379{
2380 memset(blocks, 0, sizeof(DCTELEM)*6*64);
2381}
2382
11f18faf
MN
2383static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
2384 int i;
d32ac509 2385 for(i=0; i+7<w; i+=8){
11f18faf
MN
2386 dst[i+0] += src[i+0];
2387 dst[i+1] += src[i+1];
2388 dst[i+2] += src[i+2];
2389 dst[i+3] += src[i+3];
2390 dst[i+4] += src[i+4];
2391 dst[i+5] += src[i+5];
2392 dst[i+6] += src[i+6];
2393 dst[i+7] += src[i+7];
2394 }
2395 for(; i<w; i++)
2396 dst[i+0] += src[i+0];
2397}
2398
2399static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
2400 int i;
d32ac509 2401 for(i=0; i+7<w; i+=8){
11f18faf
MN
2402 dst[i+0] = src1[i+0]-src2[i+0];
2403 dst[i+1] = src1[i+1]-src2[i+1];
2404 dst[i+2] = src1[i+2]-src2[i+2];
2405 dst[i+3] = src1[i+3]-src2[i+3];
2406 dst[i+4] = src1[i+4]-src2[i+4];
2407 dst[i+5] = src1[i+5]-src2[i+5];
2408 dst[i+6] = src1[i+6]-src2[i+6];
2409 dst[i+7] = src1[i+7]-src2[i+7];
2410 }
2411 for(; i<w; i++)
2412 dst[i+0] = src1[i+0]-src2[i+0];
2413}
2414
1457ab52
MN
2415#define BUTTERFLY2(o1,o2,i1,i2) \
2416o1= (i1)+(i2);\
2417o2= (i1)-(i2);
2418
2419#define BUTTERFLY1(x,y) \
2420{\
2421 int a,b;\
2422 a= x;\
2423 b= y;\
2424 x= a+b;\
2425 y= a-b;\
2426}
2427
2428#define BUTTERFLYA(x,y) (ABS((x)+(y)) + ABS((x)-(y)))
2429
2430static int hadamard8_diff_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride){
2431 int i;
2432 int temp[64];
2433 int sum=0;
2434
2435 for(i=0; i<8; i++){
2436 //FIXME try pointer walks
2437 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
2438 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
2439 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
2440 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
2441
2442 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2443 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2444 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2445 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2446
2447 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2448 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2449 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2450 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2451 }
2452
2453 for(i=0; i<8; i++){
2454 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2455 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2456 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2457 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2458
2459 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2460 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2461 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2462 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2463
2464 sum +=
2465 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2466 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2467 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2468 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2469 }
2470#if 0
2471static int maxi=0;
2472if(sum>maxi){
2473 maxi=sum;
2474 printf("MAX:%d\n", maxi);
2475}
2476#endif
2477 return sum;
2478}
2479
2480static int hadamard8_abs_c(uint8_t *src, int stride, int mean){
2481 int i;
2482 int temp[64];
2483 int sum=0;
2484//FIXME OOOPS ignore 0 term instead of mean mess
2485 for(i=0; i<8; i++){
2486 //FIXME try pointer walks
2487 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-mean,src[stride*i+1]-mean);
2488 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-mean,src[stride*i+3]-mean);
2489 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-mean,src[stride*i+5]-mean);
2490 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-mean,src[stride*i+7]-mean);
2491
2492 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2493 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2494 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2495 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2496
2497 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2498 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2499 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2500 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2501 }
2502
2503 for(i=0; i<8; i++){
2504 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2505 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2506 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2507 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2508
2509 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2510 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2511 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2512 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2513
2514 sum +=
2515 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2516 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2517 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2518 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2519 }
2520
2521 return sum;
2522}
2523
2524static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
2525 MpegEncContext * const s= (MpegEncContext *)c;
76fbb024
MN
2526 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
2527 DCTELEM * const temp= (DCTELEM*)aligned_temp;
1457ab52
MN
2528 int sum=0, i;
2529
2530 s->dsp.diff_pixels(temp, src1, src2, stride);
b0368839 2531 s->dsp.fdct(temp);
1457ab52
MN
2532
2533 for(i=0; i<64; i++)
2534 sum+= ABS(temp[i]);
2535
2536 return sum;
2537}
2538
0e15384d 2539void simple_idct(DCTELEM *block); //FIXME
1457ab52
MN
2540
2541static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
2542 MpegEncContext * const s= (MpegEncContext *)c;
76fbb024
MN
2543 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64*2/8];
2544 DCTELEM * const temp= (DCTELEM*)aligned_temp;
2545 DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64;
1457ab52
MN
2546 int sum=0, i;
2547
2548 s->mb_intra=0;
2549
2550 s->dsp.diff_pixels(temp, src1, src2, stride);
2551
2552 memcpy(bak, temp, 64*sizeof(DCTELEM));
2553
67725183 2554 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
1457ab52
MN
2555 s->dct_unquantize(s, temp, 0, s->qscale);
2556 simple_idct(temp); //FIXME
2557
2558 for(i=0; i<64; i++)
2559 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
2560
2561 return sum;
2562}
2563
3a87ac94
MN
2564static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
2565 MpegEncContext * const s= (MpegEncContext *)c;
0c1a9eda 2566 const uint8_t *scantable= s->intra_scantable.permutated;
76fbb024
MN
2567 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
2568 uint64_t __align8 aligned_bak[stride];
2569 DCTELEM * const temp= (DCTELEM*)aligned_temp;
2570 uint8_t * const bak= (uint8_t*)aligned_bak;
3a87ac94
MN
2571 int i, last, run, bits, level, distoration, start_i;
2572 const int esc_length= s->ac_esc_length;
2573 uint8_t * length;
2574 uint8_t * last_length;
67725183
MN
2575
2576 for(i=0; i<8; i++){
2577 ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0];
2578 ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1];
2579 }
3a87ac94 2580
67725183
MN
2581 s->dsp.diff_pixels(temp, src1, src2, stride);
2582
2583 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2584
2585 bits=0;
3a87ac94
MN
2586
2587 if (s->mb_intra) {
67725183 2588 start_i = 1;
3a87ac94
MN
2589 length = s->intra_ac_vlc_length;
2590 last_length= s->intra_ac_vlc_last_length;
67725183 2591 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3a87ac94
MN
2592 } else {
2593 start_i = 0;
2594 length = s->inter_ac_vlc_length;
2595 last_length= s->inter_ac_vlc_last_length;
2596 }
3a87ac94 2597
67725183 2598 if(last>=start_i){
3a87ac94
MN
2599 run=0;
2600 for(i=start_i; i<last; i++){
2601 int j= scantable[i];
2602 level= temp[j];
2603
2604 if(level){
2605 level+=64;
2606 if((level&(~127)) == 0){
2607 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2608 }else
2609 bits+= esc_length;
2610 run=0;
2611 }else
2612 run++;
2613 }
2614 i= scantable[last];
1d0eab1d 2615
3a87ac94 2616 level= temp[i] + 64;
1d0eab1d
MN
2617
2618 assert(level - 64);
2619
3a87ac94
MN
2620 if((level&(~127)) == 0){
2621 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2622 }else
2623 bits+= esc_length;
2624
67725183
MN
2625 }
2626
2627 if(last>=0){
3a87ac94
MN
2628 s->dct_unquantize(s, temp, 0, s->qscale);
2629 }
2630
b0368839 2631 s->dsp.idct_add(bak, stride, temp);
3a87ac94
MN
2632
2633 distoration= s->dsp.sse[1](NULL, bak, src1, stride);
2634
67725183 2635 return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7);
3a87ac94
MN
2636}
2637
2638static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
2639 MpegEncContext * const s= (MpegEncContext *)c;
0c1a9eda 2640 const uint8_t *scantable= s->intra_scantable.permutated;
76fbb024
MN
2641 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
2642 DCTELEM * const temp= (DCTELEM*)aligned_temp;
3a87ac94
MN
2643 int i, last, run, bits, level, start_i;
2644 const int esc_length= s->ac_esc_length;
2645 uint8_t * length;
2646 uint8_t * last_length;
67725183
MN
2647
2648 s->dsp.diff_pixels(temp, src1, src2, stride);
3a87ac94 2649
67725183
MN
2650 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2651
2652 bits=0;
3a87ac94
MN
2653
2654 if (s->mb_intra) {
67725183 2655 start_i = 1;
3a87ac94
MN
2656 length = s->intra_ac_vlc_length;
2657 last_length= s->intra_ac_vlc_last_length;
67725183 2658 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3a87ac94
MN
2659 } else {
2660 start_i = 0;
2661 length = s->inter_ac_vlc_length;
2662 last_length= s->inter_ac_vlc_last_length;
2663 }
3a87ac94 2664
67725183 2665 if(last>=start_i){
3a87ac94
MN
2666 run=0;
2667 for(i=start_i; i<last; i++){
2668 int j= scantable[i];
2669 level= temp[j];
2670
2671 if(level){
2672 level+=64;
2673 if((level&(~127)) == 0){
2674 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2675 }else
2676 bits+= esc_length;
2677 run=0;
2678 }else
2679 run++;
2680 }
2681 i= scantable[last];
67725183
MN
2682
2683 level= temp[i] + 64;
3a87ac94 2684
67725183 2685 assert(level - 64);
3a87ac94 2686
3a87ac94
MN
2687 if((level&(~127)) == 0){
2688 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2689 }else
2690 bits+= esc_length;
2691 }
2692
2693 return bits;
2694}
2695
2696
1457ab52
MN
2697WARPER88_1616(hadamard8_diff_c, hadamard8_diff16_c)
2698WARPER88_1616(dct_sad8x8_c, dct_sad16x16_c)
2699WARPER88_1616(quant_psnr8x8_c, quant_psnr16x16_c)
3a87ac94
MN
2700WARPER88_1616(rd8x8_c, rd16x16_c)
2701WARPER88_1616(bit8x8_c, bit16x16_c)
1457ab52 2702
b0368839
MN
2703/* XXX: those functions should be suppressed ASAP when all IDCTs are
2704 converted */
2705static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
2706{
2707 j_rev_dct (block);
2708 put_pixels_clamped_c(block, dest, line_size);
2709}
2710static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
2711{
2712 j_rev_dct (block);
2713 add_pixels_clamped_c(block, dest, line_size);
2714}
2715
59cf08ce
FB
2716/* init static data */
2717void dsputil_static_init(void)
e0eac44e 2718{
d2975f8d 2719 int i;
e0eac44e 2720
59cf08ce
FB
2721 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
2722 for(i=0;i<MAX_NEG_CROP;i++) {
2723 cropTbl[i] = 0;
2724 cropTbl[i + MAX_NEG_CROP + 256] = 255;
2725 }
2726
2727 for(i=0;i<512;i++) {
2728 squareTbl[i] = (i - 256) * (i - 256);
2729 }
2730
2731 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
2732}
92ddb692 2733
92ddb692 2734
59cf08ce
FB
2735void dsputil_init(DSPContext* c, AVCodecContext *avctx)
2736{
2737 int i;
de6d9b64 2738
b0368839
MN
2739#ifdef CONFIG_ENCODERS
2740 if(avctx->dct_algo==FF_DCT_FASTINT)
2741 c->fdct = fdct_ifast;
2742 else
2743 c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
2744#endif //CONFIG_ENCODERS
2745
2746 if(avctx->idct_algo==FF_IDCT_INT){
2747 c->idct_put= ff_jref_idct_put;
2748 c->idct_add= ff_jref_idct_add;
2749 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
2750 }else{ //accurate/default
2751 c->idct_put= simple_idct_put;
2752 c->idct_add= simple_idct_add;
2753 c->idct_permutation_type= FF_NO_IDCT_PERM;
2754 }
2755
eb4b3dd3
ZK
2756 c->get_pixels = get_pixels_c;
2757 c->diff_pixels = diff_pixels_c;
2758 c->put_pixels_clamped = put_pixels_clamped_c;
2759 c->add_pixels_clamped = add_pixels_clamped_c;
2760 c->gmc1 = gmc1_c;
2761 c->gmc = gmc_c;
2762 c->clear_blocks = clear_blocks_c;
2763 c->pix_sum = pix_sum_c;
2764 c->pix_norm1 = pix_norm1_c;
1457ab52
MN
2765 c->sse[0]= sse16_c;
2766 c->sse[1]= sse8_c;
eb4b3dd3 2767
45553457 2768 /* TODO [0] 16 [1] 8 */
eb4b3dd3
ZK
2769 c->pix_abs16x16 = pix_abs16x16_c;
2770 c->pix_abs16x16_x2 = pix_abs16x16_x2_c;
2771 c->pix_abs16x16_y2 = pix_abs16x16_y2_c;
2772 c->pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
2773 c->pix_abs8x8 = pix_abs8x8_c;
2774 c->pix_abs8x8_x2 = pix_abs8x8_x2_c;
2775 c->pix_abs8x8_y2 = pix_abs8x8_y2_c;
2776 c->pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
2777
45553457
ZK
2778#define dspfunc(PFX, IDX, NUM) \
2779 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
2780 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \
2781 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \
2782 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
2783
2784 dspfunc(put, 0, 16);
2785 dspfunc(put_no_rnd, 0, 16);
2786 dspfunc(put, 1, 8);
2787 dspfunc(put_no_rnd, 1, 8);
669ac79c
MN
2788 dspfunc(put, 2, 4);
2789 dspfunc(put, 3, 2);
45553457
ZK
2790
2791 dspfunc(avg, 0, 16);
2792 dspfunc(avg_no_rnd, 0, 16);
2793 dspfunc(avg, 1, 8);
2794 dspfunc(avg_no_rnd, 1, 8);
2795#undef dspfunc
2796
669ac79c
MN
2797 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
2798 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
2799 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
2800 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
2801 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
2802 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
2803 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
2804 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
2805 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
2806
45553457
ZK
2807#define dspfunc(PFX, IDX, NUM) \
2808 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2809 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2810 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2811 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2812 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2813 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2814 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2815 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2816 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2817 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2818 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2819 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2820 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2821 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2822 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2823 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2824
2825 dspfunc(put_qpel, 0, 16);
2826 dspfunc(put_no_rnd_qpel, 0, 16);
2827
2828 dspfunc(avg_qpel, 0, 16);
2829 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
2830
2831 dspfunc(put_qpel, 1, 8);
2832 dspfunc(put_no_rnd_qpel, 1, 8);
2833
2834 dspfunc(avg_qpel, 1, 8);
2835 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
0da71265
MN
2836
2837 dspfunc(put_h264_qpel, 0, 16);
2838 dspfunc(put_h264_qpel, 1, 8);
2839 dspfunc(put_h264_qpel, 2, 4);
2840 dspfunc(avg_h264_qpel, 0, 16);
2841 dspfunc(avg_h264_qpel, 1, 8);
2842 dspfunc(avg_h264_qpel, 2, 4);
2843
45553457 2844#undef dspfunc
0da71265
MN
2845 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
2846 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
2847 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
2848 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
2849 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
2850 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
c9a2ebc4 2851
1457ab52
MN
2852 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
2853 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
2854 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
2855 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
2856 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
2857 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
2858 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
2859 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
669ac79c 2860
1457ab52
MN
2861 c->hadamard8_diff[0]= hadamard8_diff16_c;
2862 c->hadamard8_diff[1]= hadamard8_diff_c;
2863 c->hadamard8_abs = hadamard8_abs_c;
2864
2865 c->dct_sad[0]= dct_sad16x16_c;
2866 c->dct_sad[1]= dct_sad8x8_c;
2867
2868 c->sad[0]= sad16x16_c;
2869 c->sad[1]= sad8x8_c;
2870
2871 c->quant_psnr[0]= quant_psnr16x16_c;
2872 c->quant_psnr[1]= quant_psnr8x8_c;
3a87ac94
MN
2873
2874 c->rd[0]= rd16x16_c;
2875 c->rd[1]= rd8x8_c;
2876
2877 c->bit[0]= bit16x16_c;
2878 c->bit[1]= bit8x8_c;
2879
11f18faf
MN
2880 c->add_bytes= add_bytes_c;
2881 c->diff_bytes= diff_bytes_c;
2882
980fc7b8 2883#ifdef HAVE_MMX
b0368839 2884 dsputil_init_mmx(c, avctx);
de6d9b64 2885#endif
3d03c0a2 2886#ifdef ARCH_ARMV4L
b0368839 2887 dsputil_init_armv4l(c, avctx);
3d03c0a2 2888#endif
c34270f5 2889#ifdef HAVE_MLIB
b0368839 2890 dsputil_init_mlib(c, avctx);
c34270f5 2891#endif
1e98dffb 2892#ifdef ARCH_ALPHA
b0368839 2893 dsputil_init_alpha(c, avctx);
1e98dffb 2894#endif
59925ef2 2895#ifdef ARCH_POWERPC
b0368839 2896 dsputil_init_ppc(c, avctx);
a43bd1d7 2897#endif
d46aba26 2898#ifdef HAVE_MMI
b0368839 2899 dsputil_init_mmi(c, avctx);
d46aba26 2900#endif
0c6bd2ea
B
2901#ifdef ARCH_SH4
2902 dsputil_init_sh4(c,avctx);
2903#endif
43f1708f 2904
b0368839
MN
2905 switch(c->idct_permutation_type){
2906 case FF_NO_IDCT_PERM:
2907 for(i=0; i<64; i++)
2908 c->idct_permutation[i]= i;
2909 break;
2910 case FF_LIBMPEG2_IDCT_PERM:
2911 for(i=0; i<64; i++)
2912 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
2913 break;
2914 case FF_SIMPLE_IDCT_PERM:
2915 for(i=0; i<64; i++)
2916 c->idct_permutation[i]= simple_mmx_permutation[i];
2917 break;
2918 case FF_TRANSPOSE_IDCT_PERM:
2919 for(i=0; i<64; i++)
2920 c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
2921 break;
2922 default:
2923 fprintf(stderr, "Internal error, IDCT permutation not set\n");
2924 }
57060b1e 2925}
b0368839 2926