fixed wmv2 slices
[libav.git] / libavcodec / dsputil.c
CommitLineData
de6d9b64
FB
1/*
2 * DSP utils
ff4ec49e 3 * Copyright (c) 2000, 2001 Fabrice Bellard.
de6d9b64 4 *
ff4ec49e
FB
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
de6d9b64 9 *
ff4ec49e 10 * This library is distributed in the hope that it will be useful,
de6d9b64 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ff4ec49e
FB
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
de6d9b64 14 *
ff4ec49e
FB
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
7ff037e9 18 *
59fe111e 19 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
de6d9b64 20 */
de6d9b64
FB
21#include "avcodec.h"
22#include "dsputil.h"
1457ab52 23#include "mpegvideo.h"
45553457 24
5596c60c
MN
25int ff_bit_exact=0;
26
0cfa9713 27UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
de6d9b64
FB
28UINT32 squareTbl[512];
29
2ad1516a
MN
30const UINT8 ff_zigzag_direct[64] = {
31 0, 1, 8, 16, 9, 2, 3, 10,
32 17, 24, 32, 25, 18, 11, 4, 5,
e0eac44e 33 12, 19, 26, 33, 40, 48, 41, 34,
2ad1516a 34 27, 20, 13, 6, 7, 14, 21, 28,
e0eac44e
FB
35 35, 42, 49, 56, 57, 50, 43, 36,
36 29, 22, 15, 23, 30, 37, 44, 51,
37 58, 59, 52, 45, 38, 31, 39, 46,
38 53, 60, 61, 54, 47, 55, 62, 63
39};
40
2f349de2
MN
41/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
42UINT16 __align8 inv_zigzag_direct16[64];
43
2ad1516a
MN
44const UINT8 ff_alternate_horizontal_scan[64] = {
45 0, 1, 2, 3, 8, 9, 16, 17,
e0eac44e
FB
46 10, 11, 4, 5, 6, 7, 15, 14,
47 13, 12, 19, 18, 24, 25, 32, 33,
48 26, 27, 20, 21, 22, 23, 28, 29,
49 30, 31, 34, 35, 40, 41, 48, 49,
50 42, 43, 36, 37, 38, 39, 44, 45,
51 46, 47, 50, 51, 56, 57, 58, 59,
52 52, 53, 54, 55, 60, 61, 62, 63,
53};
54
2ad1516a
MN
55const UINT8 ff_alternate_vertical_scan[64] = {
56 0, 8, 16, 24, 1, 9, 2, 10,
e0eac44e
FB
57 17, 25, 32, 40, 48, 56, 57, 49,
58 41, 33, 26, 18, 3, 11, 4, 12,
59 19, 27, 34, 42, 50, 58, 35, 43,
60 51, 59, 20, 28, 5, 13, 6, 14,
61 21, 29, 36, 44, 52, 60, 37, 45,
62 53, 61, 22, 30, 7, 15, 23, 31,
63 38, 46, 54, 62, 39, 47, 55, 63,
64};
65
2f349de2 66/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
eb4b3dd3 67const UINT32 inverse[256]={
2f349de2
MN
68 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
69 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
70 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
71 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
72 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
73 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
74 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
75 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
76 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
77 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
78 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
79 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
80 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
81 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
82 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
83 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
84 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
85 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
86 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
87 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
88 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
89 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
90 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
91 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
92 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
93 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
94 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
95 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
96 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
97 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
98 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
99 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
100};
101
eb4b3dd3 102static int pix_sum_c(UINT8 * pix, int line_size)
3aa102be
MN
103{
104 int s, i, j;
105
106 s = 0;
107 for (i = 0; i < 16; i++) {
108 for (j = 0; j < 16; j += 8) {
109 s += pix[0];
110 s += pix[1];
111 s += pix[2];
112 s += pix[3];
113 s += pix[4];
114 s += pix[5];
115 s += pix[6];
116 s += pix[7];
117 pix += 8;
118 }
119 pix += line_size - 16;
120 }
121 return s;
122}
123
eb4b3dd3 124static int pix_norm1_c(UINT8 * pix, int line_size)
3aa102be
MN
125{
126 int s, i, j;
127 UINT32 *sq = squareTbl + 256;
128
129 s = 0;
130 for (i = 0; i < 16; i++) {
131 for (j = 0; j < 16; j += 8) {
132 s += sq[pix[0]];
133 s += sq[pix[1]];
134 s += sq[pix[2]];
135 s += sq[pix[3]];
136 s += sq[pix[4]];
137 s += sq[pix[5]];
138 s += sq[pix[6]];
139 s += sq[pix[7]];
140 pix += 8;
141 }
142 pix += line_size - 16;
143 }
144 return s;
145}
146
147
1457ab52
MN
148static int sse8_c(void *v, UINT8 * pix1, UINT8 * pix2, int line_size)
149{
150 int s, i;
151 UINT32 *sq = squareTbl + 256;
152
153 s = 0;
154 for (i = 0; i < 8; i++) {
155 s += sq[pix1[0] - pix2[0]];
156 s += sq[pix1[1] - pix2[1]];
157 s += sq[pix1[2] - pix2[2]];
158 s += sq[pix1[3] - pix2[3]];
159 s += sq[pix1[4] - pix2[4]];
160 s += sq[pix1[5] - pix2[5]];
161 s += sq[pix1[6] - pix2[6]];
162 s += sq[pix1[7] - pix2[7]];
163 pix1 += line_size;
164 pix2 += line_size;
165 }
166 return s;
167}
168
169static int sse16_c(void *v, UINT8 * pix1, UINT8 * pix2, int line_size)
9c76bd48
BF
170{
171 int s, i, j;
172 UINT32 *sq = squareTbl + 256;
173
174 s = 0;
175 for (i = 0; i < 16; i++) {
176 for (j = 0; j < 16; j += 8) {
177 s += sq[pix1[0] - pix2[0]];
178 s += sq[pix1[1] - pix2[1]];
179 s += sq[pix1[2] - pix2[2]];
180 s += sq[pix1[3] - pix2[3]];
181 s += sq[pix1[4] - pix2[4]];
182 s += sq[pix1[5] - pix2[5]];
183 s += sq[pix1[6] - pix2[6]];
184 s += sq[pix1[7] - pix2[7]];
185 pix1 += 8;
186 pix2 += 8;
187 }
188 pix1 += line_size - 16;
189 pix2 += line_size - 16;
190 }
191 return s;
192}
193
eb4b3dd3 194static void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size)
de6d9b64 195{
de6d9b64
FB
196 int i;
197
198 /* read the pixels */
de6d9b64 199 for(i=0;i<8;i++) {
c13e1abd
FH
200 block[0] = pixels[0];
201 block[1] = pixels[1];
202 block[2] = pixels[2];
203 block[3] = pixels[3];
204 block[4] = pixels[4];
205 block[5] = pixels[5];
206 block[6] = pixels[6];
207 block[7] = pixels[7];
208 pixels += line_size;
209 block += 8;
de6d9b64
FB
210 }
211}
212
eb4b3dd3
ZK
213static void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1,
214 const UINT8 *s2, int stride){
9dbcbd92
MN
215 int i;
216
217 /* read the pixels */
9dbcbd92 218 for(i=0;i<8;i++) {
c13e1abd
FH
219 block[0] = s1[0] - s2[0];
220 block[1] = s1[1] - s2[1];
221 block[2] = s1[2] - s2[2];
222 block[3] = s1[3] - s2[3];
223 block[4] = s1[4] - s2[4];
224 block[5] = s1[5] - s2[5];
225 block[6] = s1[6] - s2[6];
226 block[7] = s1[7] - s2[7];
9dbcbd92
MN
227 s1 += stride;
228 s2 += stride;
c13e1abd 229 block += 8;
9dbcbd92
MN
230 }
231}
232
233
eb4b3dd3
ZK
234static void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
235 int line_size)
de6d9b64 236{
de6d9b64
FB
237 int i;
238 UINT8 *cm = cropTbl + MAX_NEG_CROP;
239
240 /* read the pixels */
de6d9b64 241 for(i=0;i<8;i++) {
c13e1abd
FH
242 pixels[0] = cm[block[0]];
243 pixels[1] = cm[block[1]];
244 pixels[2] = cm[block[2]];
245 pixels[3] = cm[block[3]];
246 pixels[4] = cm[block[4]];
247 pixels[5] = cm[block[5]];
248 pixels[6] = cm[block[6]];
249 pixels[7] = cm[block[7]];
250
251 pixels += line_size;
252 block += 8;
de6d9b64
FB
253 }
254}
255
eb4b3dd3 256static void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
c13e1abd 257 int line_size)
de6d9b64 258{
de6d9b64
FB
259 int i;
260 UINT8 *cm = cropTbl + MAX_NEG_CROP;
261
262 /* read the pixels */
de6d9b64 263 for(i=0;i<8;i++) {
c13e1abd
FH
264 pixels[0] = cm[pixels[0] + block[0]];
265 pixels[1] = cm[pixels[1] + block[1]];
266 pixels[2] = cm[pixels[2] + block[2]];
267 pixels[3] = cm[pixels[3] + block[3]];
268 pixels[4] = cm[pixels[4] + block[4]];
269 pixels[5] = cm[pixels[5] + block[5]];
270 pixels[6] = cm[pixels[6] + block[6]];
271 pixels[7] = cm[pixels[7] + block[7]];
272 pixels += line_size;
273 block += 8;
de6d9b64
FB
274 }
275}
59fe111e
MN
276#if 0
277
278#define PIXOP2(OPNAME, OP) \
b3184779 279static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
280{\
281 int i;\
282 for(i=0; i<h; i++){\
283 OP(*((uint64_t*)block), LD64(pixels));\
284 pixels+=line_size;\
285 block +=line_size;\
286 }\
287}\
288\
45553457 289static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
290{\
291 int i;\
292 for(i=0; i<h; i++){\
293 const uint64_t a= LD64(pixels );\
294 const uint64_t b= LD64(pixels+1);\
295 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
296 pixels+=line_size;\
297 block +=line_size;\
298 }\
299}\
300\
45553457 301static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
302{\
303 int i;\
304 for(i=0; i<h; i++){\
305 const uint64_t a= LD64(pixels );\
306 const uint64_t b= LD64(pixels+1);\
307 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
308 pixels+=line_size;\
309 block +=line_size;\
310 }\
311}\
312\
45553457 313static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
314{\
315 int i;\
316 for(i=0; i<h; i++){\
317 const uint64_t a= LD64(pixels );\
318 const uint64_t b= LD64(pixels+line_size);\
319 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
320 pixels+=line_size;\
321 block +=line_size;\
322 }\
323}\
324\
45553457 325static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
326{\
327 int i;\
328 for(i=0; i<h; i++){\
329 const uint64_t a= LD64(pixels );\
330 const uint64_t b= LD64(pixels+line_size);\
331 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
332 pixels+=line_size;\
333 block +=line_size;\
334 }\
335}\
336\
45553457 337static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
338{\
339 int i;\
340 const uint64_t a= LD64(pixels );\
341 const uint64_t b= LD64(pixels+1);\
342 uint64_t l0= (a&0x0303030303030303ULL)\
343 + (b&0x0303030303030303ULL)\
344 + 0x0202020202020202ULL;\
345 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
346 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
347 uint64_t l1,h1;\
348\
349 pixels+=line_size;\
350 for(i=0; i<h; i+=2){\
351 uint64_t a= LD64(pixels );\
352 uint64_t b= LD64(pixels+1);\
353 l1= (a&0x0303030303030303ULL)\
354 + (b&0x0303030303030303ULL);\
355 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
356 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
357 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
358 pixels+=line_size;\
359 block +=line_size;\
360 a= LD64(pixels );\
361 b= LD64(pixels+1);\
362 l0= (a&0x0303030303030303ULL)\
363 + (b&0x0303030303030303ULL)\
364 + 0x0202020202020202ULL;\
365 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
366 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
367 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
368 pixels+=line_size;\
369 block +=line_size;\
370 }\
371}\
372\
45553457 373static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
374{\
375 int i;\
376 const uint64_t a= LD64(pixels );\
377 const uint64_t b= LD64(pixels+1);\
378 uint64_t l0= (a&0x0303030303030303ULL)\
379 + (b&0x0303030303030303ULL)\
380 + 0x0101010101010101ULL;\
381 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
382 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
383 uint64_t l1,h1;\
384\
385 pixels+=line_size;\
386 for(i=0; i<h; i+=2){\
387 uint64_t a= LD64(pixels );\
388 uint64_t b= LD64(pixels+1);\
389 l1= (a&0x0303030303030303ULL)\
390 + (b&0x0303030303030303ULL);\
391 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
392 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
393 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
394 pixels+=line_size;\
395 block +=line_size;\
396 a= LD64(pixels );\
397 b= LD64(pixels+1);\
398 l0= (a&0x0303030303030303ULL)\
399 + (b&0x0303030303030303ULL)\
400 + 0x0101010101010101ULL;\
401 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
402 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
403 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
404 pixels+=line_size;\
405 block +=line_size;\
406 }\
407}\
408\
45553457
ZK
409CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
410CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
411CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
412CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
413CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
414CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
415CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
59fe111e
MN
416
417#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
418#else // 64 bit variant
419
420#define PIXOP2(OPNAME, OP) \
45553457 421static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
59fe111e
MN
422 int i;\
423 for(i=0; i<h; i++){\
424 OP(*((uint32_t*)(block )), LD32(pixels ));\
425 OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
426 pixels+=line_size;\
427 block +=line_size;\
428 }\
429}\
45553457
ZK
430static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
431 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
b3184779 432}\
59fe111e 433\
b3184779
MN
434static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
435 int src_stride1, int src_stride2, int h){\
59fe111e
MN
436 int i;\
437 for(i=0; i<h; i++){\
b3184779
MN
438 uint32_t a,b;\
439 a= LD32(&src1[i*src_stride1 ]);\
440 b= LD32(&src2[i*src_stride2 ]);\
441 OP(*((uint32_t*)&dst[i*dst_stride ]), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\
442 a= LD32(&src1[i*src_stride1+4]);\
443 b= LD32(&src2[i*src_stride2+4]);\
444 OP(*((uint32_t*)&dst[i*dst_stride+4]), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\
59fe111e
MN
445 }\
446}\
447\
b3184779
MN
448static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
449 int src_stride1, int src_stride2, int h){\
59fe111e
MN
450 int i;\
451 for(i=0; i<h; i++){\
b3184779
MN
452 uint32_t a,b;\
453 a= LD32(&src1[i*src_stride1 ]);\
454 b= LD32(&src2[i*src_stride2 ]);\
455 OP(*((uint32_t*)&dst[i*dst_stride ]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\
456 a= LD32(&src1[i*src_stride1+4]);\
457 b= LD32(&src2[i*src_stride2+4]);\
458 OP(*((uint32_t*)&dst[i*dst_stride+4]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\
59fe111e
MN
459 }\
460}\
461\
b3184779
MN
462static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
463 int src_stride1, int src_stride2, int h){\
464 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
465 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
466}\
467\
468static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
469 int src_stride1, int src_stride2, int h){\
470 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
471 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
472}\
473\
45553457 474static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
475 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
476}\
477\
45553457 478static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
479 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
480}\
481\
45553457 482static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
483 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
484}\
485\
45553457 486static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
487 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
488}\
489\
490static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
491 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
59fe111e
MN
492 int i;\
493 for(i=0; i<h; i++){\
b3184779
MN
494 uint32_t a, b, c, d, l0, l1, h0, h1;\
495 a= LD32(&src1[i*src_stride1]);\
496 b= LD32(&src2[i*src_stride2]);\
497 c= LD32(&src3[i*src_stride3]);\
498 d= LD32(&src4[i*src_stride4]);\
499 l0= (a&0x03030303UL)\
500 + (b&0x03030303UL)\
501 + 0x02020202UL;\
502 h0= ((a&0xFCFCFCFCUL)>>2)\
503 + ((b&0xFCFCFCFCUL)>>2);\
504 l1= (c&0x03030303UL)\
505 + (d&0x03030303UL);\
506 h1= ((c&0xFCFCFCFCUL)>>2)\
507 + ((d&0xFCFCFCFCUL)>>2);\
508 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
509 a= LD32(&src1[i*src_stride1+4]);\
510 b= LD32(&src2[i*src_stride2+4]);\
511 c= LD32(&src3[i*src_stride3+4]);\
512 d= LD32(&src4[i*src_stride4+4]);\
513 l0= (a&0x03030303UL)\
514 + (b&0x03030303UL)\
515 + 0x02020202UL;\
516 h0= ((a&0xFCFCFCFCUL)>>2)\
517 + ((b&0xFCFCFCFCUL)>>2);\
518 l1= (c&0x03030303UL)\
519 + (d&0x03030303UL);\
520 h1= ((c&0xFCFCFCFCUL)>>2)\
521 + ((d&0xFCFCFCFCUL)>>2);\
522 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
59fe111e
MN
523 }\
524}\
b3184779
MN
525static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
526 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
59fe111e
MN
527 int i;\
528 for(i=0; i<h; i++){\
b3184779
MN
529 uint32_t a, b, c, d, l0, l1, h0, h1;\
530 a= LD32(&src1[i*src_stride1]);\
531 b= LD32(&src2[i*src_stride2]);\
532 c= LD32(&src3[i*src_stride3]);\
533 d= LD32(&src4[i*src_stride4]);\
534 l0= (a&0x03030303UL)\
535 + (b&0x03030303UL)\
536 + 0x01010101UL;\
537 h0= ((a&0xFCFCFCFCUL)>>2)\
538 + ((b&0xFCFCFCFCUL)>>2);\
539 l1= (c&0x03030303UL)\
540 + (d&0x03030303UL);\
541 h1= ((c&0xFCFCFCFCUL)>>2)\
542 + ((d&0xFCFCFCFCUL)>>2);\
543 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
544 a= LD32(&src1[i*src_stride1+4]);\
545 b= LD32(&src2[i*src_stride2+4]);\
546 c= LD32(&src3[i*src_stride3+4]);\
547 d= LD32(&src4[i*src_stride4+4]);\
548 l0= (a&0x03030303UL)\
549 + (b&0x03030303UL)\
550 + 0x01010101UL;\
551 h0= ((a&0xFCFCFCFCUL)>>2)\
552 + ((b&0xFCFCFCFCUL)>>2);\
553 l1= (c&0x03030303UL)\
554 + (d&0x03030303UL);\
555 h1= ((c&0xFCFCFCFCUL)>>2)\
556 + ((d&0xFCFCFCFCUL)>>2);\
557 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
59fe111e
MN
558 }\
559}\
b3184779
MN
560static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
561 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
562 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
563 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
564}\
565static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
566 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
567 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
568 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
569}\
59fe111e 570\
45553457 571static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
572{\
573 int j;\
574 for(j=0; j<2; j++){\
575 int i;\
576 const uint32_t a= LD32(pixels );\
577 const uint32_t b= LD32(pixels+1);\
578 uint32_t l0= (a&0x03030303UL)\
579 + (b&0x03030303UL)\
580 + 0x02020202UL;\
581 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
582 + ((b&0xFCFCFCFCUL)>>2);\
583 uint32_t l1,h1;\
584\
585 pixels+=line_size;\
586 for(i=0; i<h; i+=2){\
587 uint32_t a= LD32(pixels );\
588 uint32_t b= LD32(pixels+1);\
589 l1= (a&0x03030303UL)\
590 + (b&0x03030303UL);\
591 h1= ((a&0xFCFCFCFCUL)>>2)\
592 + ((b&0xFCFCFCFCUL)>>2);\
593 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
594 pixels+=line_size;\
595 block +=line_size;\
596 a= LD32(pixels );\
597 b= LD32(pixels+1);\
598 l0= (a&0x03030303UL)\
599 + (b&0x03030303UL)\
600 + 0x02020202UL;\
601 h0= ((a&0xFCFCFCFCUL)>>2)\
602 + ((b&0xFCFCFCFCUL)>>2);\
603 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
604 pixels+=line_size;\
605 block +=line_size;\
606 }\
607 pixels+=4-line_size*(h+1);\
608 block +=4-line_size*h;\
609 }\
610}\
611\
45553457 612static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
613{\
614 int j;\
615 for(j=0; j<2; j++){\
616 int i;\
617 const uint32_t a= LD32(pixels );\
618 const uint32_t b= LD32(pixels+1);\
619 uint32_t l0= (a&0x03030303UL)\
620 + (b&0x03030303UL)\
621 + 0x01010101UL;\
622 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
623 + ((b&0xFCFCFCFCUL)>>2);\
624 uint32_t l1,h1;\
625\
626 pixels+=line_size;\
627 for(i=0; i<h; i+=2){\
628 uint32_t a= LD32(pixels );\
629 uint32_t b= LD32(pixels+1);\
630 l1= (a&0x03030303UL)\
631 + (b&0x03030303UL);\
632 h1= ((a&0xFCFCFCFCUL)>>2)\
633 + ((b&0xFCFCFCFCUL)>>2);\
634 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
635 pixels+=line_size;\
636 block +=line_size;\
637 a= LD32(pixels );\
638 b= LD32(pixels+1);\
639 l0= (a&0x03030303UL)\
640 + (b&0x03030303UL)\
641 + 0x01010101UL;\
642 h0= ((a&0xFCFCFCFCUL)>>2)\
643 + ((b&0xFCFCFCFCUL)>>2);\
644 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
645 pixels+=line_size;\
646 block +=line_size;\
647 }\
648 pixels+=4-line_size*(h+1);\
649 block +=4-line_size*h;\
650 }\
651}\
652\
45553457
ZK
653CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
654CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
655CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
656CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
657CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
658CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
659CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
660CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
b3184779 661
59fe111e
MN
662#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
663#endif
59fe111e
MN
664#define op_put(a, b) a = b
665
666PIXOP2(avg, op_avg)
667PIXOP2(put, op_put)
668#undef op_avg
669#undef op_put
670
de6d9b64
FB
671#define avg2(a,b) ((a+b+1)>>1)
672#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
673
073b013d 674
b3184779 675static void gmc1_c(UINT8 *dst, UINT8 *src, int stride, int h, int x16, int y16, int rounder)
44eb4951
MN
676{
677 const int A=(16-x16)*(16-y16);
678 const int B=( x16)*(16-y16);
679 const int C=(16-x16)*( y16);
680 const int D=( x16)*( y16);
681 int i;
44eb4951
MN
682
683 for(i=0; i<h; i++)
684 {
b3184779
MN
685 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
686 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
687 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
688 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
689 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
690 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
691 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
692 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
693 dst+= stride;
694 src+= stride;
44eb4951
MN
695 }
696}
697
073b013d
MN
698static void gmc_c(UINT8 *dst, UINT8 *src, int stride, int h, int ox, int oy,
699 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
700{
701 int y, vx, vy;
702 const int s= 1<<shift;
703
704 width--;
705 height--;
706
707 for(y=0; y<h; y++){
708 int x;
709
710 vx= ox;
711 vy= oy;
712 for(x=0; x<8; x++){ //XXX FIXME optimize
713 int src_x, src_y, frac_x, frac_y, index;
714
715 src_x= vx>>16;
716 src_y= vy>>16;
717 frac_x= src_x&(s-1);
718 frac_y= src_y&(s-1);
719 src_x>>=shift;
720 src_y>>=shift;
721
722 if((unsigned)src_x < width){
723 if((unsigned)src_y < height){
724 index= src_x + src_y*stride;
725 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
726 + src[index +1]* frac_x )*(s-frac_y)
727 + ( src[index+stride ]*(s-frac_x)
728 + src[index+stride+1]* frac_x )* frac_y
729 + r)>>(shift*2);
730 }else{
731 index= src_x + clip(src_y, 0, height)*stride;
732 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
733 + src[index +1]* frac_x )*s
734 + r)>>(shift*2);
735 }
736 }else{
737 if((unsigned)src_y < height){
738 index= clip(src_x, 0, width) + src_y*stride;
739 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
740 + src[index+stride ]* frac_y )*s
741 + r)>>(shift*2);
742 }else{
743 index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride;
744 dst[y*stride + x]= src[index ];
745 }
746 }
747
748 vx+= dxx;
749 vy+= dyx;
750 }
751 ox += dxy;
752 oy += dyy;
753 }
754}
755
b3184779 756static inline void copy_block17(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h)
44eb4951 757{
44eb4951
MN
758 int i;
759 for(i=0; i<h; i++)
760 {
b3184779
MN
761 ST32(dst , LD32(src ));
762 ST32(dst+4 , LD32(src+4 ));
763 ST32(dst+8 , LD32(src+8 ));
764 ST32(dst+12, LD32(src+12));
765 dst[16]= src[16];
44eb4951
MN
766 dst+=dstStride;
767 src+=srcStride;
768 }
769}
770
b3184779 771static inline void copy_block9(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h)
44eb4951
MN
772{
773 int i;
b3184779 774 for(i=0; i<h; i++)
44eb4951 775 {
b3184779
MN
776 ST32(dst , LD32(src ));
777 ST32(dst+4 , LD32(src+4 ));
778 dst[8]= src[8];
44eb4951
MN
779 dst+=dstStride;
780 src+=srcStride;
781 }
782}
783
826f429a 784
b3184779
MN
785#define QPEL_MC(r, OPNAME, RND, OP) \
786static void OPNAME ## mpeg4_qpel8_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h){\
787 UINT8 *cm = cropTbl + MAX_NEG_CROP;\
788 int i;\
789 for(i=0; i<h; i++)\
790 {\
791 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
792 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
793 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
794 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
795 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
796 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
797 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
798 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
799 dst+=dstStride;\
800 src+=srcStride;\
801 }\
44eb4951
MN
802}\
803\
b3184779
MN
804static void OPNAME ## mpeg4_qpel8_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int w){\
805 UINT8 *cm = cropTbl + MAX_NEG_CROP;\
806 int i;\
807 for(i=0; i<w; i++)\
808 {\
809 const int src0= src[0*srcStride];\
810 const int src1= src[1*srcStride];\
811 const int src2= src[2*srcStride];\
812 const int src3= src[3*srcStride];\
813 const int src4= src[4*srcStride];\
814 const int src5= src[5*srcStride];\
815 const int src6= src[6*srcStride];\
816 const int src7= src[7*srcStride];\
817 const int src8= src[8*srcStride];\
818 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
819 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
820 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
821 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
822 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
823 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
824 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
825 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
826 dst++;\
827 src++;\
828 }\
829}\
830\
831static void OPNAME ## mpeg4_qpel16_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h){\
832 UINT8 *cm = cropTbl + MAX_NEG_CROP;\
833 int i;\
826f429a 834 \
b3184779
MN
835 for(i=0; i<h; i++)\
836 {\
837 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
838 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
839 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
840 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
841 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
842 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
843 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
844 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
845 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
846 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
847 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
848 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
849 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
850 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
851 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
852 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
853 dst+=dstStride;\
854 src+=srcStride;\
855 }\
856}\
857\
826f429a 858static void OPNAME ## mpeg4_qpel16_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride){\
b3184779
MN
859 UINT8 *cm = cropTbl + MAX_NEG_CROP;\
860 int i;\
826f429a 861 const int w=16;\
b3184779
MN
862 for(i=0; i<w; i++)\
863 {\
864 const int src0= src[0*srcStride];\
865 const int src1= src[1*srcStride];\
866 const int src2= src[2*srcStride];\
867 const int src3= src[3*srcStride];\
868 const int src4= src[4*srcStride];\
869 const int src5= src[5*srcStride];\
870 const int src6= src[6*srcStride];\
871 const int src7= src[7*srcStride];\
872 const int src8= src[8*srcStride];\
873 const int src9= src[9*srcStride];\
874 const int src10= src[10*srcStride];\
875 const int src11= src[11*srcStride];\
876 const int src12= src[12*srcStride];\
877 const int src13= src[13*srcStride];\
878 const int src14= src[14*srcStride];\
879 const int src15= src[15*srcStride];\
880 const int src16= src[16*srcStride];\
881 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
882 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
883 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
884 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
885 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
886 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
887 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
888 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
889 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
890 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
891 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
892 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
893 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
894 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
895 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
896 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
897 dst++;\
898 src++;\
899 }\
900}\
901\
902static void OPNAME ## qpel8_mc00_c (UINT8 *dst, UINT8 *src, int stride){\
45553457 903 OPNAME ## pixels8_c(dst, src, stride, 8);\
b3184779
MN
904}\
905\
906static void OPNAME ## qpel8_mc10_c(UINT8 *dst, UINT8 *src, int stride){\
44eb4951 907 UINT8 half[64];\
b3184779
MN
908 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
909 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
44eb4951
MN
910}\
911\
b3184779
MN
912static void OPNAME ## qpel8_mc20_c(UINT8 *dst, UINT8 *src, int stride){\
913 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
44eb4951
MN
914}\
915\
b3184779 916static void OPNAME ## qpel8_mc30_c(UINT8 *dst, UINT8 *src, int stride){\
44eb4951 917 UINT8 half[64];\
b3184779
MN
918 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
919 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
44eb4951
MN
920}\
921\
b3184779
MN
922static void OPNAME ## qpel8_mc01_c(UINT8 *dst, UINT8 *src, int stride){\
923 UINT8 full[16*9];\
44eb4951 924 UINT8 half[64];\
b3184779
MN
925 copy_block9(full, src, 16, stride, 9);\
926 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16, 8);\
927 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
44eb4951
MN
928}\
929\
b3184779
MN
930static void OPNAME ## qpel8_mc02_c(UINT8 *dst, UINT8 *src, int stride){\
931 UINT8 full[16*9];\
932 copy_block9(full, src, 16, stride, 9);\
933 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16, 8);\
44eb4951
MN
934}\
935\
b3184779
MN
936static void OPNAME ## qpel8_mc03_c(UINT8 *dst, UINT8 *src, int stride){\
937 UINT8 full[16*9];\
44eb4951 938 UINT8 half[64];\
b3184779
MN
939 copy_block9(full, src, 16, stride, 9);\
940 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16, 8);\
941 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
44eb4951 942}\
b3184779
MN
943static void OPNAME ## qpel8_mc11_c(UINT8 *dst, UINT8 *src, int stride){\
944 UINT8 full[16*9];\
44eb4951 945 UINT8 halfH[72];\
7ff037e9 946 UINT8 halfV[64];\
44eb4951 947 UINT8 halfHV[64];\
b3184779
MN
948 copy_block9(full, src, 16, stride, 9);\
949 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
950 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16, 8);\
951 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
952 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 953}\
b3184779
MN
954static void OPNAME ## qpel8_mc31_c(UINT8 *dst, UINT8 *src, int stride){\
955 UINT8 full[16*9];\
44eb4951 956 UINT8 halfH[72];\
7ff037e9 957 UINT8 halfV[64];\
44eb4951 958 UINT8 halfHV[64];\
b3184779
MN
959 copy_block9(full, src, 16, stride, 9);\
960 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
961 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16, 8);\
962 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
963 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 964}\
b3184779
MN
965static void OPNAME ## qpel8_mc13_c(UINT8 *dst, UINT8 *src, int stride){\
966 UINT8 full[16*9];\
44eb4951 967 UINT8 halfH[72];\
7ff037e9 968 UINT8 halfV[64];\
44eb4951 969 UINT8 halfHV[64];\
b3184779
MN
970 copy_block9(full, src, 16, stride, 9);\
971 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
972 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16, 8);\
973 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
974 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 975}\
b3184779
MN
976static void OPNAME ## qpel8_mc33_c(UINT8 *dst, UINT8 *src, int stride){\
977 UINT8 full[16*9];\
44eb4951 978 UINT8 halfH[72];\
7ff037e9 979 UINT8 halfV[64];\
44eb4951 980 UINT8 halfHV[64];\
b3184779
MN
981 copy_block9(full, src, 16, stride, 9);\
982 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
983 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16, 8);\
984 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
985 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 986}\
b3184779 987static void OPNAME ## qpel8_mc21_c(UINT8 *dst, UINT8 *src, int stride){\
44eb4951
MN
988 UINT8 halfH[72];\
989 UINT8 halfHV[64];\
b3184779
MN
990 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
991 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
992 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
44eb4951 993}\
b3184779 994static void OPNAME ## qpel8_mc23_c(UINT8 *dst, UINT8 *src, int stride){\
44eb4951
MN
995 UINT8 halfH[72];\
996 UINT8 halfHV[64];\
b3184779
MN
997 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
998 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
999 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
44eb4951 1000}\
b3184779
MN
1001static void OPNAME ## qpel8_mc12_c(UINT8 *dst, UINT8 *src, int stride){\
1002 UINT8 full[16*9];\
44eb4951 1003 UINT8 halfH[72];\
7ff037e9 1004 UINT8 halfV[64];\
44eb4951 1005 UINT8 halfHV[64];\
b3184779
MN
1006 copy_block9(full, src, 16, stride, 9);\
1007 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1008 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16, 8);\
1009 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
1010 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 1011}\
b3184779
MN
1012static void OPNAME ## qpel8_mc32_c(UINT8 *dst, UINT8 *src, int stride){\
1013 UINT8 full[16*9];\
44eb4951 1014 UINT8 halfH[72];\
7ff037e9 1015 UINT8 halfV[64];\
44eb4951 1016 UINT8 halfHV[64];\
b3184779
MN
1017 copy_block9(full, src, 16, stride, 9);\
1018 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1019 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16, 8);\
1020 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
1021 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 1022}\
b3184779 1023static void OPNAME ## qpel8_mc22_c(UINT8 *dst, UINT8 *src, int stride){\
44eb4951 1024 UINT8 halfH[72];\
b3184779
MN
1025 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1026 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8, 8);\
1027}\
1028static void OPNAME ## qpel16_mc00_c (UINT8 *dst, UINT8 *src, int stride){\
45553457 1029 OPNAME ## pixels16_c(dst, src, stride, 16);\
b3184779
MN
1030}\
1031\
1032static void OPNAME ## qpel16_mc10_c(UINT8 *dst, UINT8 *src, int stride){\
1033 UINT8 half[256];\
1034 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1035 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
1036}\
1037\
1038static void OPNAME ## qpel16_mc20_c(UINT8 *dst, UINT8 *src, int stride){\
1039 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
44eb4951 1040}\
b3184779
MN
1041\
1042static void OPNAME ## qpel16_mc30_c(UINT8 *dst, UINT8 *src, int stride){\
1043 UINT8 half[256];\
1044 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1045 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
1046}\
1047\
1048static void OPNAME ## qpel16_mc01_c(UINT8 *dst, UINT8 *src, int stride){\
1049 UINT8 full[24*17];\
1050 UINT8 half[256];\
1051 copy_block17(full, src, 24, stride, 17);\
826f429a 1052 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
b3184779
MN
1053 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
1054}\
1055\
1056static void OPNAME ## qpel16_mc02_c(UINT8 *dst, UINT8 *src, int stride){\
1057 UINT8 full[24*17];\
1058 copy_block17(full, src, 24, stride, 17);\
826f429a 1059 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
b3184779
MN
1060}\
1061\
1062static void OPNAME ## qpel16_mc03_c(UINT8 *dst, UINT8 *src, int stride){\
1063 UINT8 full[24*17];\
1064 UINT8 half[256];\
1065 copy_block17(full, src, 24, stride, 17);\
826f429a 1066 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
b3184779
MN
1067 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
1068}\
1069static void OPNAME ## qpel16_mc11_c(UINT8 *dst, UINT8 *src, int stride){\
1070 UINT8 full[24*17];\
1071 UINT8 halfH[272];\
1072 UINT8 halfV[256];\
1073 UINT8 halfHV[256];\
1074 copy_block17(full, src, 24, stride, 17);\
1075 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1076 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1077 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1078 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1079}\
1080static void OPNAME ## qpel16_mc31_c(UINT8 *dst, UINT8 *src, int stride){\
1081 UINT8 full[24*17];\
1082 UINT8 halfH[272];\
1083 UINT8 halfV[256];\
1084 UINT8 halfHV[256];\
1085 copy_block17(full, src, 24, stride, 17);\
1086 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1087 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1088 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1089 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1090}\
1091static void OPNAME ## qpel16_mc13_c(UINT8 *dst, UINT8 *src, int stride){\
1092 UINT8 full[24*17];\
1093 UINT8 halfH[272];\
1094 UINT8 halfV[256];\
1095 UINT8 halfHV[256];\
1096 copy_block17(full, src, 24, stride, 17);\
1097 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1098 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1099 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1100 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1101}\
1102static void OPNAME ## qpel16_mc33_c(UINT8 *dst, UINT8 *src, int stride){\
1103 UINT8 full[24*17];\
1104 UINT8 halfH[272];\
1105 UINT8 halfV[256];\
1106 UINT8 halfHV[256];\
1107 copy_block17(full, src, 24, stride, 17);\
1108 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
826f429a
MN
1109 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1110 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1111 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1112}\
1113static void OPNAME ## qpel16_mc21_c(UINT8 *dst, UINT8 *src, int stride){\
1114 UINT8 halfH[272];\
1115 UINT8 halfHV[256];\
1116 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1117 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1118 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1119}\
1120static void OPNAME ## qpel16_mc23_c(UINT8 *dst, UINT8 *src, int stride){\
1121 UINT8 halfH[272];\
1122 UINT8 halfHV[256];\
1123 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1124 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1125 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1126}\
1127static void OPNAME ## qpel16_mc12_c(UINT8 *dst, UINT8 *src, int stride){\
1128 UINT8 full[24*17];\
1129 UINT8 halfH[272];\
1130 UINT8 halfV[256];\
1131 UINT8 halfHV[256];\
1132 copy_block17(full, src, 24, stride, 17);\
1133 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1134 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1135 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1136 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
1137}\
1138static void OPNAME ## qpel16_mc32_c(UINT8 *dst, UINT8 *src, int stride){\
1139 UINT8 full[24*17];\
1140 UINT8 halfH[272];\
1141 UINT8 halfV[256];\
1142 UINT8 halfHV[256];\
1143 copy_block17(full, src, 24, stride, 17);\
1144 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1145 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1146 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1147 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
1148}\
1149static void OPNAME ## qpel16_mc22_c(UINT8 *dst, UINT8 *src, int stride){\
1150 UINT8 halfH[272];\
1151 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1152 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
45553457 1153}
44eb4951 1154
b3184779
MN
1155#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1156#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1157#define op_put(a, b) a = cm[((b) + 16)>>5]
1158#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1159
1160QPEL_MC(0, put_ , _ , op_put)
1161QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1162QPEL_MC(0, avg_ , _ , op_avg)
1163//QPEL_MC(1, avg_no_rnd , _ , op_avg)
1164#undef op_avg
1165#undef op_avg_no_rnd
1166#undef op_put
1167#undef op_put_no_rnd
44eb4951 1168
1457ab52
MN
1169static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
1170 uint8_t *cm = cropTbl + MAX_NEG_CROP;
1171 int i;
1172
1173 for(i=0; i<h; i++){
1174 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1175 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1176 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1177 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1178 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1179 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1180 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1181 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1182 dst+=dstStride;
1183 src+=srcStride;
1184 }
1185}
1186
1187static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
1188 uint8_t *cm = cropTbl + MAX_NEG_CROP;
1189 int i;
1190
1191 for(i=0; i<w; i++){
1192 const int src_1= src[ -srcStride];
1193 const int src0 = src[0 ];
1194 const int src1 = src[ srcStride];
1195 const int src2 = src[2*srcStride];
1196 const int src3 = src[3*srcStride];
1197 const int src4 = src[4*srcStride];
1198 const int src5 = src[5*srcStride];
1199 const int src6 = src[6*srcStride];
1200 const int src7 = src[7*srcStride];
1201 const int src8 = src[8*srcStride];
1202 const int src9 = src[9*srcStride];
1203 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1204 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1205 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1206 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1207 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1208 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1209 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1210 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1211 src++;
1212 dst++;
1213 }
1214}
1215
1216static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
1217 put_pixels8_c(dst, src, stride, 8);
1218}
1219
1220static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
1221 uint8_t half[64];
1222 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1223 put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
1224}
1225
1226static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
1227 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
1228}
1229
1230static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
1231 uint8_t half[64];
1232 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1233 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
1234}
1235
1236static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
1237 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
1238}
1239
1240static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
1241 uint8_t halfH[88];
1242 uint8_t halfV[64];
1243 uint8_t halfHV[64];
1244 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1245 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1246 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1247 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
1248}
1249static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
1250 uint8_t halfH[88];
1251 uint8_t halfV[64];
1252 uint8_t halfHV[64];
1253 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1254 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1255 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1256 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
1257}
1258static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
1259 uint8_t halfH[88];
1260 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1261 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
1262}
1263
1264
1265static inline int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
de6d9b64
FB
1266{
1267 int s, i;
1268
1269 s = 0;
ba6802de 1270 for(i=0;i<16;i++) {
de6d9b64
FB
1271 s += abs(pix1[0] - pix2[0]);
1272 s += abs(pix1[1] - pix2[1]);
1273 s += abs(pix1[2] - pix2[2]);
1274 s += abs(pix1[3] - pix2[3]);
1275 s += abs(pix1[4] - pix2[4]);
1276 s += abs(pix1[5] - pix2[5]);
1277 s += abs(pix1[6] - pix2[6]);
1278 s += abs(pix1[7] - pix2[7]);
1279 s += abs(pix1[8] - pix2[8]);
1280 s += abs(pix1[9] - pix2[9]);
1281 s += abs(pix1[10] - pix2[10]);
1282 s += abs(pix1[11] - pix2[11]);
1283 s += abs(pix1[12] - pix2[12]);
1284 s += abs(pix1[13] - pix2[13]);
1285 s += abs(pix1[14] - pix2[14]);
1286 s += abs(pix1[15] - pix2[15]);
1287 pix1 += line_size;
1288 pix2 += line_size;
1289 }
1290 return s;
1291}
1292
eb4b3dd3 1293static int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
de6d9b64
FB
1294{
1295 int s, i;
1296
1297 s = 0;
ba6802de 1298 for(i=0;i<16;i++) {
de6d9b64
FB
1299 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1300 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1301 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1302 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1303 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1304 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1305 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1306 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1307 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
1308 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
1309 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
1310 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
1311 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
1312 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
1313 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
1314 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
1315 pix1 += line_size;
1316 pix2 += line_size;
1317 }
1318 return s;
1319}
1320
eb4b3dd3 1321static int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
de6d9b64
FB
1322{
1323 int s, i;
1324 UINT8 *pix3 = pix2 + line_size;
1325
1326 s = 0;
ba6802de 1327 for(i=0;i<16;i++) {
de6d9b64
FB
1328 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1329 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1330 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1331 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1332 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1333 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1334 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1335 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1336 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
1337 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
1338 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
1339 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
1340 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
1341 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
1342 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
1343 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
1344 pix1 += line_size;
1345 pix2 += line_size;
1346 pix3 += line_size;
1347 }
1348 return s;
1349}
1350
eb4b3dd3 1351static int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
de6d9b64
FB
1352{
1353 int s, i;
1354 UINT8 *pix3 = pix2 + line_size;
1355
1356 s = 0;
ba6802de 1357 for(i=0;i<16;i++) {
de6d9b64
FB
1358 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1359 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1360 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1361 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1362 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1363 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1364 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1365 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1366 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1367 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1368 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1369 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1370 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1371 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1372 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1373 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1374 pix1 += line_size;
1375 pix2 += line_size;
1376 pix3 += line_size;
1377 }
1378 return s;
1379}
1380
1457ab52 1381static inline int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
ba6802de
MN
1382{
1383 int s, i;
1384
1385 s = 0;
1386 for(i=0;i<8;i++) {
1387 s += abs(pix1[0] - pix2[0]);
1388 s += abs(pix1[1] - pix2[1]);
1389 s += abs(pix1[2] - pix2[2]);
1390 s += abs(pix1[3] - pix2[3]);
1391 s += abs(pix1[4] - pix2[4]);
1392 s += abs(pix1[5] - pix2[5]);
1393 s += abs(pix1[6] - pix2[6]);
1394 s += abs(pix1[7] - pix2[7]);
1395 pix1 += line_size;
1396 pix2 += line_size;
1397 }
1398 return s;
1399}
1400
eb4b3dd3 1401static int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
ba6802de
MN
1402{
1403 int s, i;
1404
1405 s = 0;
1406 for(i=0;i<8;i++) {
1407 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1408 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1409 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1410 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1411 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1412 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1413 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1414 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1415 pix1 += line_size;
1416 pix2 += line_size;
1417 }
1418 return s;
1419}
1420
eb4b3dd3 1421static int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
ba6802de
MN
1422{
1423 int s, i;
1424 UINT8 *pix3 = pix2 + line_size;
1425
1426 s = 0;
1427 for(i=0;i<8;i++) {
1428 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1429 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1430 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1431 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1432 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1433 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1434 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1435 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1436 pix1 += line_size;
1437 pix2 += line_size;
1438 pix3 += line_size;
1439 }
1440 return s;
1441}
1442
eb4b3dd3 1443static int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
ba6802de
MN
1444{
1445 int s, i;
1446 UINT8 *pix3 = pix2 + line_size;
1447
1448 s = 0;
1449 for(i=0;i<8;i++) {
1450 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1451 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1452 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1453 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1454 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1455 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1456 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1457 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1458 pix1 += line_size;
1459 pix2 += line_size;
1460 pix3 += line_size;
1461 }
1462 return s;
1463}
1464
1457ab52
MN
1465static int sad16x16_c(void *s, uint8_t *a, uint8_t *b, int stride){
1466 return pix_abs16x16_c(a,b,stride);
1467}
1468
1469static int sad8x8_c(void *s, uint8_t *a, uint8_t *b, int stride){
1470 return pix_abs8x8_c(a,b,stride);
1471}
1472
477ab036 1473void ff_block_permute(DCTELEM *block, UINT8 *permutation, const UINT8 *scantable, int last)
d962f6fd 1474{
7801d21d 1475 int i;
477ab036 1476 DCTELEM temp[64];
7801d21d
MN
1477
1478 if(last<=0) return;
9a7b310d 1479 //if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms
d962f6fd 1480
7801d21d
MN
1481 for(i=0; i<=last; i++){
1482 const int j= scantable[i];
1483 temp[j]= block[j];
1484 block[j]=0;
1485 }
1486
1487 for(i=0; i<=last; i++){
1488 const int j= scantable[i];
1489 const int perm_j= permutation[j];
1490 block[perm_j]= temp[j];
1491 }
d962f6fd 1492}
e0eac44e 1493
eb4b3dd3 1494static void clear_blocks_c(DCTELEM *blocks)
649c00c9
MN
1495{
1496 memset(blocks, 0, sizeof(DCTELEM)*6*64);
1497}
1498
11f18faf
MN
1499static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
1500 int i;
1501 for(i=0; i+7<w; i++){
1502 dst[i+0] += src[i+0];
1503 dst[i+1] += src[i+1];
1504 dst[i+2] += src[i+2];
1505 dst[i+3] += src[i+3];
1506 dst[i+4] += src[i+4];
1507 dst[i+5] += src[i+5];
1508 dst[i+6] += src[i+6];
1509 dst[i+7] += src[i+7];
1510 }
1511 for(; i<w; i++)
1512 dst[i+0] += src[i+0];
1513}
1514
1515static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
1516 int i;
1517 for(i=0; i+7<w; i++){
1518 dst[i+0] = src1[i+0]-src2[i+0];
1519 dst[i+1] = src1[i+1]-src2[i+1];
1520 dst[i+2] = src1[i+2]-src2[i+2];
1521 dst[i+3] = src1[i+3]-src2[i+3];
1522 dst[i+4] = src1[i+4]-src2[i+4];
1523 dst[i+5] = src1[i+5]-src2[i+5];
1524 dst[i+6] = src1[i+6]-src2[i+6];
1525 dst[i+7] = src1[i+7]-src2[i+7];
1526 }
1527 for(; i<w; i++)
1528 dst[i+0] = src1[i+0]-src2[i+0];
1529}
1530
1457ab52
MN
1531#define BUTTERFLY2(o1,o2,i1,i2) \
1532o1= (i1)+(i2);\
1533o2= (i1)-(i2);
1534
1535#define BUTTERFLY1(x,y) \
1536{\
1537 int a,b;\
1538 a= x;\
1539 b= y;\
1540 x= a+b;\
1541 y= a-b;\
1542}
1543
1544#define BUTTERFLYA(x,y) (ABS((x)+(y)) + ABS((x)-(y)))
1545
1546static int hadamard8_diff_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride){
1547 int i;
1548 int temp[64];
1549 int sum=0;
1550
1551 for(i=0; i<8; i++){
1552 //FIXME try pointer walks
1553 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
1554 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
1555 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
1556 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
1557
1558 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
1559 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
1560 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
1561 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
1562
1563 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
1564 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
1565 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
1566 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
1567 }
1568
1569 for(i=0; i<8; i++){
1570 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
1571 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
1572 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
1573 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
1574
1575 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
1576 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
1577 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
1578 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
1579
1580 sum +=
1581 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
1582 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
1583 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
1584 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
1585 }
1586#if 0
1587static int maxi=0;
1588if(sum>maxi){
1589 maxi=sum;
1590 printf("MAX:%d\n", maxi);
1591}
1592#endif
1593 return sum;
1594}
1595
1596static int hadamard8_abs_c(uint8_t *src, int stride, int mean){
1597 int i;
1598 int temp[64];
1599 int sum=0;
1600//FIXME OOOPS ignore 0 term instead of mean mess
1601 for(i=0; i<8; i++){
1602 //FIXME try pointer walks
1603 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-mean,src[stride*i+1]-mean);
1604 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-mean,src[stride*i+3]-mean);
1605 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-mean,src[stride*i+5]-mean);
1606 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-mean,src[stride*i+7]-mean);
1607
1608 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
1609 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
1610 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
1611 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
1612
1613 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
1614 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
1615 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
1616 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
1617 }
1618
1619 for(i=0; i<8; i++){
1620 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
1621 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
1622 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
1623 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
1624
1625 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
1626 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
1627 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
1628 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
1629
1630 sum +=
1631 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
1632 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
1633 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
1634 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
1635 }
1636
1637 return sum;
1638}
1639
1640static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
1641 MpegEncContext * const s= (MpegEncContext *)c;
1642 DCTELEM temp[64];
1643 int sum=0, i;
1644
1645 s->dsp.diff_pixels(temp, src1, src2, stride);
1646 s->fdct(temp);
1647
1648 for(i=0; i<64; i++)
1649 sum+= ABS(temp[i]);
1650
1651 return sum;
1652}
1653
1654void simple_idct(INT16 *block); //FIXME
1655
1656static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
1657 MpegEncContext * const s= (MpegEncContext *)c;
1658 DCTELEM temp[64], bak[64];
1659 int sum=0, i;
1660
1661 s->mb_intra=0;
1662
1663 s->dsp.diff_pixels(temp, src1, src2, stride);
1664
1665 memcpy(bak, temp, 64*sizeof(DCTELEM));
1666
1667 s->dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
1668 s->dct_unquantize(s, temp, 0, s->qscale);
1669 simple_idct(temp); //FIXME
1670
1671 for(i=0; i<64; i++)
1672 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
1673
1674 return sum;
1675}
1676
1677WARPER88_1616(hadamard8_diff_c, hadamard8_diff16_c)
1678WARPER88_1616(dct_sad8x8_c, dct_sad16x16_c)
1679WARPER88_1616(quant_psnr8x8_c, quant_psnr16x16_c)
1680
eb4b3dd3 1681void dsputil_init(DSPContext* c, unsigned mask)
e0eac44e 1682{
5abd509a 1683 static int init_done = 0;
d2975f8d 1684 int i;
e0eac44e 1685
5abd509a
ZK
1686 if (!init_done) {
1687 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
1688 for(i=0;i<MAX_NEG_CROP;i++) {
1689 cropTbl[i] = 0;
1690 cropTbl[i + MAX_NEG_CROP + 256] = 255;
1691 }
de6d9b64 1692
5abd509a
ZK
1693 for(i=0;i<512;i++) {
1694 squareTbl[i] = (i - 256) * (i - 256);
1695 }
92ddb692
ZK
1696
1697 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
1698
1699 init_done = 1;
de6d9b64
FB
1700 }
1701
eb4b3dd3
ZK
1702 c->get_pixels = get_pixels_c;
1703 c->diff_pixels = diff_pixels_c;
1704 c->put_pixels_clamped = put_pixels_clamped_c;
1705 c->add_pixels_clamped = add_pixels_clamped_c;
1706 c->gmc1 = gmc1_c;
1707 c->gmc = gmc_c;
1708 c->clear_blocks = clear_blocks_c;
1709 c->pix_sum = pix_sum_c;
1710 c->pix_norm1 = pix_norm1_c;
1457ab52
MN
1711 c->sse[0]= sse16_c;
1712 c->sse[1]= sse8_c;
eb4b3dd3 1713
45553457 1714 /* TODO [0] 16 [1] 8 */
eb4b3dd3
ZK
1715 c->pix_abs16x16 = pix_abs16x16_c;
1716 c->pix_abs16x16_x2 = pix_abs16x16_x2_c;
1717 c->pix_abs16x16_y2 = pix_abs16x16_y2_c;
1718 c->pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
1719 c->pix_abs8x8 = pix_abs8x8_c;
1720 c->pix_abs8x8_x2 = pix_abs8x8_x2_c;
1721 c->pix_abs8x8_y2 = pix_abs8x8_y2_c;
1722 c->pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
1723
45553457
ZK
1724#define dspfunc(PFX, IDX, NUM) \
1725 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
1726 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \
1727 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \
1728 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
1729
1730 dspfunc(put, 0, 16);
1731 dspfunc(put_no_rnd, 0, 16);
1732 dspfunc(put, 1, 8);
1733 dspfunc(put_no_rnd, 1, 8);
1734
1735 dspfunc(avg, 0, 16);
1736 dspfunc(avg_no_rnd, 0, 16);
1737 dspfunc(avg, 1, 8);
1738 dspfunc(avg_no_rnd, 1, 8);
1739#undef dspfunc
1740
1741#define dspfunc(PFX, IDX, NUM) \
1742 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
1743 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
1744 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
1745 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
1746 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
1747 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
1748 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
1749 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
1750 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
1751 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
1752 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
1753 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
1754 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
1755 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
1756 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
1757 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
1758
1759 dspfunc(put_qpel, 0, 16);
1760 dspfunc(put_no_rnd_qpel, 0, 16);
1761
1762 dspfunc(avg_qpel, 0, 16);
1763 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
1764
1765 dspfunc(put_qpel, 1, 8);
1766 dspfunc(put_no_rnd_qpel, 1, 8);
1767
1768 dspfunc(avg_qpel, 1, 8);
1769 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
1770#undef dspfunc
c9a2ebc4 1771
1457ab52
MN
1772 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
1773 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
1774 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
1775 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
1776 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
1777 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
1778 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
1779 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
1780
1781 c->hadamard8_diff[0]= hadamard8_diff16_c;
1782 c->hadamard8_diff[1]= hadamard8_diff_c;
1783 c->hadamard8_abs = hadamard8_abs_c;
1784
1785 c->dct_sad[0]= dct_sad16x16_c;
1786 c->dct_sad[1]= dct_sad8x8_c;
1787
1788 c->sad[0]= sad16x16_c;
1789 c->sad[1]= sad8x8_c;
1790
1791 c->quant_psnr[0]= quant_psnr16x16_c;
1792 c->quant_psnr[1]= quant_psnr8x8_c;
1793
11f18faf
MN
1794 c->add_bytes= add_bytes_c;
1795 c->diff_bytes= diff_bytes_c;
1796
980fc7b8 1797#ifdef HAVE_MMX
eb4b3dd3 1798 dsputil_init_mmx(c, mask);
34dfe896
ZK
1799 if (ff_bit_exact)
1800 {
1801 /* FIXME - AVCodec context should have flag for bitexact match */
1802 /* fprintf(stderr, "\n\n\nff_bit_exact %d\n\n\n\n", ff_bit_exact); */
1803 dsputil_set_bit_exact_mmx(c, mask);
1804 }
de6d9b64 1805#endif
3d03c0a2 1806#ifdef ARCH_ARMV4L
eb4b3dd3 1807 dsputil_init_armv4l(c, mask);
3d03c0a2 1808#endif
c34270f5 1809#ifdef HAVE_MLIB
eb4b3dd3 1810 dsputil_init_mlib(c, mask);
c34270f5 1811#endif
1e98dffb 1812#ifdef ARCH_ALPHA
eb4b3dd3 1813 dsputil_init_alpha(c, mask);
1e98dffb 1814#endif
59925ef2 1815#ifdef ARCH_POWERPC
eb4b3dd3 1816 dsputil_init_ppc(c, mask);
a43bd1d7 1817#endif
d46aba26 1818#ifdef HAVE_MMI
eb4b3dd3 1819 dsputil_init_mmi(c, mask);
d46aba26 1820#endif
de6d9b64 1821}
43f1708f 1822
57060b1e
FB
1823/* remove any non bit exact operation (testing purpose) */
1824void avcodec_set_bit_exact(void)
1825{
5596c60c 1826 ff_bit_exact=1;
57060b1e 1827#ifdef HAVE_MMX
34dfe896 1828// FIXME - better set_bit_exact
eb4b3dd3 1829// dsputil_set_bit_exact_mmx();
57060b1e
FB
1830#endif
1831}