simplify
[libav.git] / libavcodec / dsputil.c
CommitLineData
de6d9b64
FB
1/*
2 * DSP utils
ff4ec49e 3 * Copyright (c) 2000, 2001 Fabrice Bellard.
de6d9b64 4 *
ff4ec49e
FB
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
de6d9b64 9 *
ff4ec49e 10 * This library is distributed in the hope that it will be useful,
de6d9b64 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ff4ec49e
FB
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
de6d9b64 14 *
ff4ec49e
FB
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
7ff037e9 18 *
59fe111e 19 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
de6d9b64 20 */
983e3246
MN
21
22/**
23 * @file dsputil.c
24 * DSP utils
25 */
26
de6d9b64
FB
27#include "avcodec.h"
28#include "dsputil.h"
1457ab52 29#include "mpegvideo.h"
b0368839 30#include "simple_idct.h"
65e4c8c9 31#include "faandct.h"
5596c60c 32
0c1a9eda
ZK
33uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
34uint32_t squareTbl[512];
de6d9b64 35
0c1a9eda 36const uint8_t ff_zigzag_direct[64] = {
2ad1516a
MN
37 0, 1, 8, 16, 9, 2, 3, 10,
38 17, 24, 32, 25, 18, 11, 4, 5,
e0eac44e 39 12, 19, 26, 33, 40, 48, 41, 34,
2ad1516a 40 27, 20, 13, 6, 7, 14, 21, 28,
e0eac44e
FB
41 35, 42, 49, 56, 57, 50, 43, 36,
42 29, 22, 15, 23, 30, 37, 44, 51,
43 58, 59, 52, 45, 38, 31, 39, 46,
44 53, 60, 61, 54, 47, 55, 62, 63
45};
46
10acc479
RS
47/* Specific zigzag scan for 248 idct. NOTE that unlike the
48 specification, we interleave the fields */
49const uint8_t ff_zigzag248_direct[64] = {
50 0, 8, 1, 9, 16, 24, 2, 10,
51 17, 25, 32, 40, 48, 56, 33, 41,
52 18, 26, 3, 11, 4, 12, 19, 27,
53 34, 42, 49, 57, 50, 58, 35, 43,
54 20, 28, 5, 13, 6, 14, 21, 29,
55 36, 44, 51, 59, 52, 60, 37, 45,
56 22, 30, 7, 15, 23, 31, 38, 46,
57 53, 61, 54, 62, 39, 47, 55, 63,
58};
59
2f349de2 60/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
0c1a9eda 61uint16_t __align8 inv_zigzag_direct16[64];
2f349de2 62
0c1a9eda 63const uint8_t ff_alternate_horizontal_scan[64] = {
2ad1516a 64 0, 1, 2, 3, 8, 9, 16, 17,
e0eac44e
FB
65 10, 11, 4, 5, 6, 7, 15, 14,
66 13, 12, 19, 18, 24, 25, 32, 33,
67 26, 27, 20, 21, 22, 23, 28, 29,
68 30, 31, 34, 35, 40, 41, 48, 49,
69 42, 43, 36, 37, 38, 39, 44, 45,
70 46, 47, 50, 51, 56, 57, 58, 59,
71 52, 53, 54, 55, 60, 61, 62, 63,
72};
73
0c1a9eda 74const uint8_t ff_alternate_vertical_scan[64] = {
2ad1516a 75 0, 8, 16, 24, 1, 9, 2, 10,
e0eac44e
FB
76 17, 25, 32, 40, 48, 56, 57, 49,
77 41, 33, 26, 18, 3, 11, 4, 12,
78 19, 27, 34, 42, 50, 58, 35, 43,
79 51, 59, 20, 28, 5, 13, 6, 14,
80 21, 29, 36, 44, 52, 60, 37, 45,
81 53, 61, 22, 30, 7, 15, 23, 31,
82 38, 46, 54, 62, 39, 47, 55, 63,
83};
84
2f349de2 85/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
0c1a9eda 86const uint32_t inverse[256]={
2f349de2
MN
87 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
88 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
89 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
90 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
91 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
92 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
93 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
94 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
95 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
96 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
97 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
98 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
99 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
100 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
101 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
102 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
103 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
104 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
105 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
106 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
107 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
108 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
109 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
110 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
111 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
112 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
113 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
114 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
115 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
116 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
117 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
118 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
119};
120
b0368839
MN
121/* Input permutation for the simple_idct_mmx */
122static const uint8_t simple_mmx_permutation[64]={
123 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
124 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
125 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
126 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
127 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
128 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
129 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
130 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
131};
132
0c1a9eda 133static int pix_sum_c(uint8_t * pix, int line_size)
3aa102be
MN
134{
135 int s, i, j;
136
137 s = 0;
138 for (i = 0; i < 16; i++) {
139 for (j = 0; j < 16; j += 8) {
140 s += pix[0];
141 s += pix[1];
142 s += pix[2];
143 s += pix[3];
144 s += pix[4];
145 s += pix[5];
146 s += pix[6];
147 s += pix[7];
148 pix += 8;
149 }
150 pix += line_size - 16;
151 }
152 return s;
153}
154
0c1a9eda 155static int pix_norm1_c(uint8_t * pix, int line_size)
3aa102be
MN
156{
157 int s, i, j;
0c1a9eda 158 uint32_t *sq = squareTbl + 256;
3aa102be
MN
159
160 s = 0;
161 for (i = 0; i < 16; i++) {
162 for (j = 0; j < 16; j += 8) {
2a006cd3 163#if 0
3aa102be
MN
164 s += sq[pix[0]];
165 s += sq[pix[1]];
166 s += sq[pix[2]];
167 s += sq[pix[3]];
168 s += sq[pix[4]];
169 s += sq[pix[5]];
170 s += sq[pix[6]];
171 s += sq[pix[7]];
2a006cd3
FL
172#else
173#if LONG_MAX > 2147483647
174 register uint64_t x=*(uint64_t*)pix;
175 s += sq[x&0xff];
176 s += sq[(x>>8)&0xff];
177 s += sq[(x>>16)&0xff];
178 s += sq[(x>>24)&0xff];
179 s += sq[(x>>32)&0xff];
180 s += sq[(x>>40)&0xff];
181 s += sq[(x>>48)&0xff];
182 s += sq[(x>>56)&0xff];
183#else
184 register uint32_t x=*(uint32_t*)pix;
185 s += sq[x&0xff];
186 s += sq[(x>>8)&0xff];
187 s += sq[(x>>16)&0xff];
188 s += sq[(x>>24)&0xff];
189 x=*(uint32_t*)(pix+4);
190 s += sq[x&0xff];
191 s += sq[(x>>8)&0xff];
192 s += sq[(x>>16)&0xff];
193 s += sq[(x>>24)&0xff];
194#endif
195#endif
3aa102be
MN
196 pix += 8;
197 }
198 pix += line_size - 16;
199 }
200 return s;
201}
202
3d2e8cce
MN
203static void bswap_buf(uint32_t *dst, uint32_t *src, int w){
204 int i;
205
206 for(i=0; i+8<=w; i+=8){
207 dst[i+0]= bswap_32(src[i+0]);
208 dst[i+1]= bswap_32(src[i+1]);
209 dst[i+2]= bswap_32(src[i+2]);
210 dst[i+3]= bswap_32(src[i+3]);
211 dst[i+4]= bswap_32(src[i+4]);
212 dst[i+5]= bswap_32(src[i+5]);
213 dst[i+6]= bswap_32(src[i+6]);
214 dst[i+7]= bswap_32(src[i+7]);
215 }
216 for(;i<w; i++){
217 dst[i+0]= bswap_32(src[i+0]);
218 }
219}
3aa102be 220
bb198e19 221static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
1457ab52
MN
222{
223 int s, i;
0c1a9eda 224 uint32_t *sq = squareTbl + 256;
1457ab52
MN
225
226 s = 0;
bb198e19 227 for (i = 0; i < h; i++) {
1457ab52
MN
228 s += sq[pix1[0] - pix2[0]];
229 s += sq[pix1[1] - pix2[1]];
230 s += sq[pix1[2] - pix2[2]];
231 s += sq[pix1[3] - pix2[3]];
232 s += sq[pix1[4] - pix2[4]];
233 s += sq[pix1[5] - pix2[5]];
234 s += sq[pix1[6] - pix2[6]];
235 s += sq[pix1[7] - pix2[7]];
236 pix1 += line_size;
237 pix2 += line_size;
238 }
239 return s;
240}
241
bb198e19 242static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
9c76bd48 243{
6b026927
FH
244 int s, i;
245 uint32_t *sq = squareTbl + 256;
9c76bd48
BF
246
247 s = 0;
bb198e19 248 for (i = 0; i < h; i++) {
6b026927
FH
249 s += sq[pix1[ 0] - pix2[ 0]];
250 s += sq[pix1[ 1] - pix2[ 1]];
251 s += sq[pix1[ 2] - pix2[ 2]];
252 s += sq[pix1[ 3] - pix2[ 3]];
253 s += sq[pix1[ 4] - pix2[ 4]];
254 s += sq[pix1[ 5] - pix2[ 5]];
255 s += sq[pix1[ 6] - pix2[ 6]];
256 s += sq[pix1[ 7] - pix2[ 7]];
257 s += sq[pix1[ 8] - pix2[ 8]];
258 s += sq[pix1[ 9] - pix2[ 9]];
259 s += sq[pix1[10] - pix2[10]];
260 s += sq[pix1[11] - pix2[11]];
261 s += sq[pix1[12] - pix2[12]];
262 s += sq[pix1[13] - pix2[13]];
263 s += sq[pix1[14] - pix2[14]];
264 s += sq[pix1[15] - pix2[15]];
2a006cd3 265
6b026927
FH
266 pix1 += line_size;
267 pix2 += line_size;
9c76bd48
BF
268 }
269 return s;
270}
271
0c1a9eda 272static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
de6d9b64 273{
de6d9b64
FB
274 int i;
275
276 /* read the pixels */
de6d9b64 277 for(i=0;i<8;i++) {
c13e1abd
FH
278 block[0] = pixels[0];
279 block[1] = pixels[1];
280 block[2] = pixels[2];
281 block[3] = pixels[3];
282 block[4] = pixels[4];
283 block[5] = pixels[5];
284 block[6] = pixels[6];
285 block[7] = pixels[7];
286 pixels += line_size;
287 block += 8;
de6d9b64
FB
288 }
289}
290
0c1a9eda
ZK
291static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
292 const uint8_t *s2, int stride){
9dbcbd92
MN
293 int i;
294
295 /* read the pixels */
9dbcbd92 296 for(i=0;i<8;i++) {
c13e1abd
FH
297 block[0] = s1[0] - s2[0];
298 block[1] = s1[1] - s2[1];
299 block[2] = s1[2] - s2[2];
300 block[3] = s1[3] - s2[3];
301 block[4] = s1[4] - s2[4];
302 block[5] = s1[5] - s2[5];
303 block[6] = s1[6] - s2[6];
304 block[7] = s1[7] - s2[7];
9dbcbd92
MN
305 s1 += stride;
306 s2 += stride;
c13e1abd 307 block += 8;
9dbcbd92
MN
308 }
309}
310
311
0c1a9eda 312static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
eb4b3dd3 313 int line_size)
de6d9b64 314{
de6d9b64 315 int i;
0c1a9eda 316 uint8_t *cm = cropTbl + MAX_NEG_CROP;
de6d9b64
FB
317
318 /* read the pixels */
de6d9b64 319 for(i=0;i<8;i++) {
c13e1abd
FH
320 pixels[0] = cm[block[0]];
321 pixels[1] = cm[block[1]];
322 pixels[2] = cm[block[2]];
323 pixels[3] = cm[block[3]];
324 pixels[4] = cm[block[4]];
325 pixels[5] = cm[block[5]];
326 pixels[6] = cm[block[6]];
327 pixels[7] = cm[block[7]];
328
329 pixels += line_size;
330 block += 8;
de6d9b64
FB
331 }
332}
333
0c1a9eda 334static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
c13e1abd 335 int line_size)
de6d9b64 336{
de6d9b64 337 int i;
0c1a9eda 338 uint8_t *cm = cropTbl + MAX_NEG_CROP;
de6d9b64
FB
339
340 /* read the pixels */
de6d9b64 341 for(i=0;i<8;i++) {
c13e1abd
FH
342 pixels[0] = cm[pixels[0] + block[0]];
343 pixels[1] = cm[pixels[1] + block[1]];
344 pixels[2] = cm[pixels[2] + block[2]];
345 pixels[3] = cm[pixels[3] + block[3]];
346 pixels[4] = cm[pixels[4] + block[4]];
347 pixels[5] = cm[pixels[5] + block[5]];
348 pixels[6] = cm[pixels[6] + block[6]];
349 pixels[7] = cm[pixels[7] + block[7]];
350 pixels += line_size;
351 block += 8;
de6d9b64
FB
352 }
353}
59fe111e
MN
354#if 0
355
356#define PIXOP2(OPNAME, OP) \
b3184779 357static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
358{\
359 int i;\
360 for(i=0; i<h; i++){\
361 OP(*((uint64_t*)block), LD64(pixels));\
362 pixels+=line_size;\
363 block +=line_size;\
364 }\
365}\
366\
45553457 367static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
368{\
369 int i;\
370 for(i=0; i<h; i++){\
371 const uint64_t a= LD64(pixels );\
372 const uint64_t b= LD64(pixels+1);\
373 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
374 pixels+=line_size;\
375 block +=line_size;\
376 }\
377}\
378\
45553457 379static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
380{\
381 int i;\
382 for(i=0; i<h; i++){\
383 const uint64_t a= LD64(pixels );\
384 const uint64_t b= LD64(pixels+1);\
385 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
386 pixels+=line_size;\
387 block +=line_size;\
388 }\
389}\
390\
45553457 391static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
392{\
393 int i;\
394 for(i=0; i<h; i++){\
395 const uint64_t a= LD64(pixels );\
396 const uint64_t b= LD64(pixels+line_size);\
397 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
398 pixels+=line_size;\
399 block +=line_size;\
400 }\
401}\
402\
45553457 403static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
404{\
405 int i;\
406 for(i=0; i<h; i++){\
407 const uint64_t a= LD64(pixels );\
408 const uint64_t b= LD64(pixels+line_size);\
409 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
410 pixels+=line_size;\
411 block +=line_size;\
412 }\
413}\
414\
45553457 415static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
416{\
417 int i;\
418 const uint64_t a= LD64(pixels );\
419 const uint64_t b= LD64(pixels+1);\
420 uint64_t l0= (a&0x0303030303030303ULL)\
421 + (b&0x0303030303030303ULL)\
422 + 0x0202020202020202ULL;\
423 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
424 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
425 uint64_t l1,h1;\
426\
427 pixels+=line_size;\
428 for(i=0; i<h; i+=2){\
429 uint64_t a= LD64(pixels );\
430 uint64_t b= LD64(pixels+1);\
431 l1= (a&0x0303030303030303ULL)\
432 + (b&0x0303030303030303ULL);\
433 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
434 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
435 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
436 pixels+=line_size;\
437 block +=line_size;\
438 a= LD64(pixels );\
439 b= LD64(pixels+1);\
440 l0= (a&0x0303030303030303ULL)\
441 + (b&0x0303030303030303ULL)\
442 + 0x0202020202020202ULL;\
443 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
444 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
445 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
446 pixels+=line_size;\
447 block +=line_size;\
448 }\
449}\
450\
45553457 451static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
452{\
453 int i;\
454 const uint64_t a= LD64(pixels );\
455 const uint64_t b= LD64(pixels+1);\
456 uint64_t l0= (a&0x0303030303030303ULL)\
457 + (b&0x0303030303030303ULL)\
458 + 0x0101010101010101ULL;\
459 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
460 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
461 uint64_t l1,h1;\
462\
463 pixels+=line_size;\
464 for(i=0; i<h; i+=2){\
465 uint64_t a= LD64(pixels );\
466 uint64_t b= LD64(pixels+1);\
467 l1= (a&0x0303030303030303ULL)\
468 + (b&0x0303030303030303ULL);\
469 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
470 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
471 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
472 pixels+=line_size;\
473 block +=line_size;\
474 a= LD64(pixels );\
475 b= LD64(pixels+1);\
476 l0= (a&0x0303030303030303ULL)\
477 + (b&0x0303030303030303ULL)\
478 + 0x0101010101010101ULL;\
479 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
480 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
481 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
482 pixels+=line_size;\
483 block +=line_size;\
484 }\
485}\
486\
45553457
ZK
487CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
488CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
489CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
490CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
491CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
492CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
493CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
59fe111e
MN
494
495#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
496#else // 64 bit variant
497
498#define PIXOP2(OPNAME, OP) \
669ac79c
MN
499static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
500 int i;\
501 for(i=0; i<h; i++){\
502 OP(*((uint16_t*)(block )), LD16(pixels ));\
503 pixels+=line_size;\
504 block +=line_size;\
505 }\
506}\
0da71265
MN
507static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
508 int i;\
509 for(i=0; i<h; i++){\
510 OP(*((uint32_t*)(block )), LD32(pixels ));\
511 pixels+=line_size;\
512 block +=line_size;\
513 }\
514}\
45553457 515static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
59fe111e
MN
516 int i;\
517 for(i=0; i<h; i++){\
518 OP(*((uint32_t*)(block )), LD32(pixels ));\
519 OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
520 pixels+=line_size;\
521 block +=line_size;\
522 }\
523}\
45553457
ZK
524static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
525 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
b3184779 526}\
59fe111e 527\
b3184779
MN
528static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
529 int src_stride1, int src_stride2, int h){\
59fe111e
MN
530 int i;\
531 for(i=0; i<h; i++){\
b3184779
MN
532 uint32_t a,b;\
533 a= LD32(&src1[i*src_stride1 ]);\
534 b= LD32(&src2[i*src_stride2 ]);\
d8085ea7 535 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
b3184779
MN
536 a= LD32(&src1[i*src_stride1+4]);\
537 b= LD32(&src2[i*src_stride2+4]);\
d8085ea7 538 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
59fe111e
MN
539 }\
540}\
541\
b3184779
MN
542static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
543 int src_stride1, int src_stride2, int h){\
59fe111e
MN
544 int i;\
545 for(i=0; i<h; i++){\
b3184779
MN
546 uint32_t a,b;\
547 a= LD32(&src1[i*src_stride1 ]);\
548 b= LD32(&src2[i*src_stride2 ]);\
d8085ea7 549 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
b3184779
MN
550 a= LD32(&src1[i*src_stride1+4]);\
551 b= LD32(&src2[i*src_stride2+4]);\
d8085ea7 552 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
59fe111e
MN
553 }\
554}\
555\
0da71265
MN
556static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
557 int src_stride1, int src_stride2, int h){\
558 int i;\
559 for(i=0; i<h; i++){\
560 uint32_t a,b;\
561 a= LD32(&src1[i*src_stride1 ]);\
562 b= LD32(&src2[i*src_stride2 ]);\
d8085ea7 563 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
0da71265
MN
564 }\
565}\
566\
669ac79c
MN
567static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
568 int src_stride1, int src_stride2, int h){\
569 int i;\
570 for(i=0; i<h; i++){\
571 uint32_t a,b;\
572 a= LD16(&src1[i*src_stride1 ]);\
573 b= LD16(&src2[i*src_stride2 ]);\
574 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
575 }\
576}\
577\
b3184779
MN
578static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
579 int src_stride1, int src_stride2, int h){\
580 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
581 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
582}\
583\
584static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
585 int src_stride1, int src_stride2, int h){\
586 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
587 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
588}\
589\
45553457 590static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
591 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
592}\
593\
45553457 594static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
595 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
596}\
597\
45553457 598static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
599 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
600}\
601\
45553457 602static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
603 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
604}\
605\
606static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
607 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
59fe111e
MN
608 int i;\
609 for(i=0; i<h; i++){\
b3184779
MN
610 uint32_t a, b, c, d, l0, l1, h0, h1;\
611 a= LD32(&src1[i*src_stride1]);\
612 b= LD32(&src2[i*src_stride2]);\
613 c= LD32(&src3[i*src_stride3]);\
614 d= LD32(&src4[i*src_stride4]);\
615 l0= (a&0x03030303UL)\
616 + (b&0x03030303UL)\
617 + 0x02020202UL;\
618 h0= ((a&0xFCFCFCFCUL)>>2)\
619 + ((b&0xFCFCFCFCUL)>>2);\
620 l1= (c&0x03030303UL)\
621 + (d&0x03030303UL);\
622 h1= ((c&0xFCFCFCFCUL)>>2)\
623 + ((d&0xFCFCFCFCUL)>>2);\
624 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
625 a= LD32(&src1[i*src_stride1+4]);\
626 b= LD32(&src2[i*src_stride2+4]);\
627 c= LD32(&src3[i*src_stride3+4]);\
628 d= LD32(&src4[i*src_stride4+4]);\
629 l0= (a&0x03030303UL)\
630 + (b&0x03030303UL)\
631 + 0x02020202UL;\
632 h0= ((a&0xFCFCFCFCUL)>>2)\
633 + ((b&0xFCFCFCFCUL)>>2);\
634 l1= (c&0x03030303UL)\
635 + (d&0x03030303UL);\
636 h1= ((c&0xFCFCFCFCUL)>>2)\
637 + ((d&0xFCFCFCFCUL)>>2);\
638 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
59fe111e
MN
639 }\
640}\
669ac79c
MN
641\
642static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
643 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
644}\
645\
646static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
647 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
648}\
649\
650static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
651 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
652}\
653\
654static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
655 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
656}\
657\
b3184779
MN
658static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
659 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
59fe111e
MN
660 int i;\
661 for(i=0; i<h; i++){\
b3184779
MN
662 uint32_t a, b, c, d, l0, l1, h0, h1;\
663 a= LD32(&src1[i*src_stride1]);\
664 b= LD32(&src2[i*src_stride2]);\
665 c= LD32(&src3[i*src_stride3]);\
666 d= LD32(&src4[i*src_stride4]);\
667 l0= (a&0x03030303UL)\
668 + (b&0x03030303UL)\
669 + 0x01010101UL;\
670 h0= ((a&0xFCFCFCFCUL)>>2)\
671 + ((b&0xFCFCFCFCUL)>>2);\
672 l1= (c&0x03030303UL)\
673 + (d&0x03030303UL);\
674 h1= ((c&0xFCFCFCFCUL)>>2)\
675 + ((d&0xFCFCFCFCUL)>>2);\
676 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
677 a= LD32(&src1[i*src_stride1+4]);\
678 b= LD32(&src2[i*src_stride2+4]);\
679 c= LD32(&src3[i*src_stride3+4]);\
680 d= LD32(&src4[i*src_stride4+4]);\
681 l0= (a&0x03030303UL)\
682 + (b&0x03030303UL)\
683 + 0x01010101UL;\
684 h0= ((a&0xFCFCFCFCUL)>>2)\
685 + ((b&0xFCFCFCFCUL)>>2);\
686 l1= (c&0x03030303UL)\
687 + (d&0x03030303UL);\
688 h1= ((c&0xFCFCFCFCUL)>>2)\
689 + ((d&0xFCFCFCFCUL)>>2);\
690 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
59fe111e
MN
691 }\
692}\
b3184779
MN
693static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
694 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
695 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
696 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
697}\
698static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
699 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
700 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
701 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
702}\
59fe111e 703\
669ac79c
MN
704static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
705{\
706 int i, a0, b0, a1, b1;\
707 a0= pixels[0];\
708 b0= pixels[1] + 2;\
709 a0 += b0;\
710 b0 += pixels[2];\
711\
712 pixels+=line_size;\
713 for(i=0; i<h; i+=2){\
714 a1= pixels[0];\
715 b1= pixels[1];\
716 a1 += b1;\
717 b1 += pixels[2];\
718\
719 block[0]= (a1+a0)>>2; /* FIXME non put */\
720 block[1]= (b1+b0)>>2;\
721\
722 pixels+=line_size;\
723 block +=line_size;\
724\
725 a0= pixels[0];\
726 b0= pixels[1] + 2;\
727 a0 += b0;\
728 b0 += pixels[2];\
729\
730 block[0]= (a1+a0)>>2;\
731 block[1]= (b1+b0)>>2;\
732 pixels+=line_size;\
733 block +=line_size;\
734 }\
735}\
736\
737static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
738{\
739 int i;\
740 const uint32_t a= LD32(pixels );\
741 const uint32_t b= LD32(pixels+1);\
742 uint32_t l0= (a&0x03030303UL)\
743 + (b&0x03030303UL)\
744 + 0x02020202UL;\
745 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
746 + ((b&0xFCFCFCFCUL)>>2);\
747 uint32_t l1,h1;\
748\
749 pixels+=line_size;\
750 for(i=0; i<h; i+=2){\
751 uint32_t a= LD32(pixels );\
752 uint32_t b= LD32(pixels+1);\
753 l1= (a&0x03030303UL)\
754 + (b&0x03030303UL);\
755 h1= ((a&0xFCFCFCFCUL)>>2)\
756 + ((b&0xFCFCFCFCUL)>>2);\
757 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
758 pixels+=line_size;\
759 block +=line_size;\
760 a= LD32(pixels );\
761 b= LD32(pixels+1);\
762 l0= (a&0x03030303UL)\
763 + (b&0x03030303UL)\
764 + 0x02020202UL;\
765 h0= ((a&0xFCFCFCFCUL)>>2)\
766 + ((b&0xFCFCFCFCUL)>>2);\
767 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
768 pixels+=line_size;\
769 block +=line_size;\
770 }\
771}\
772\
45553457 773static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
774{\
775 int j;\
776 for(j=0; j<2; j++){\
777 int i;\
778 const uint32_t a= LD32(pixels );\
779 const uint32_t b= LD32(pixels+1);\
780 uint32_t l0= (a&0x03030303UL)\
781 + (b&0x03030303UL)\
782 + 0x02020202UL;\
783 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
784 + ((b&0xFCFCFCFCUL)>>2);\
785 uint32_t l1,h1;\
786\
787 pixels+=line_size;\
788 for(i=0; i<h; i+=2){\
789 uint32_t a= LD32(pixels );\
790 uint32_t b= LD32(pixels+1);\
791 l1= (a&0x03030303UL)\
792 + (b&0x03030303UL);\
793 h1= ((a&0xFCFCFCFCUL)>>2)\
794 + ((b&0xFCFCFCFCUL)>>2);\
795 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
796 pixels+=line_size;\
797 block +=line_size;\
798 a= LD32(pixels );\
799 b= LD32(pixels+1);\
800 l0= (a&0x03030303UL)\
801 + (b&0x03030303UL)\
802 + 0x02020202UL;\
803 h0= ((a&0xFCFCFCFCUL)>>2)\
804 + ((b&0xFCFCFCFCUL)>>2);\
805 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
806 pixels+=line_size;\
807 block +=line_size;\
808 }\
809 pixels+=4-line_size*(h+1);\
810 block +=4-line_size*h;\
811 }\
812}\
813\
45553457 814static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
815{\
816 int j;\
817 for(j=0; j<2; j++){\
818 int i;\
819 const uint32_t a= LD32(pixels );\
820 const uint32_t b= LD32(pixels+1);\
821 uint32_t l0= (a&0x03030303UL)\
822 + (b&0x03030303UL)\
823 + 0x01010101UL;\
824 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
825 + ((b&0xFCFCFCFCUL)>>2);\
826 uint32_t l1,h1;\
827\
828 pixels+=line_size;\
829 for(i=0; i<h; i+=2){\
830 uint32_t a= LD32(pixels );\
831 uint32_t b= LD32(pixels+1);\
832 l1= (a&0x03030303UL)\
833 + (b&0x03030303UL);\
834 h1= ((a&0xFCFCFCFCUL)>>2)\
835 + ((b&0xFCFCFCFCUL)>>2);\
836 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
837 pixels+=line_size;\
838 block +=line_size;\
839 a= LD32(pixels );\
840 b= LD32(pixels+1);\
841 l0= (a&0x03030303UL)\
842 + (b&0x03030303UL)\
843 + 0x01010101UL;\
844 h0= ((a&0xFCFCFCFCUL)>>2)\
845 + ((b&0xFCFCFCFCUL)>>2);\
846 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
847 pixels+=line_size;\
848 block +=line_size;\
849 }\
850 pixels+=4-line_size*(h+1);\
851 block +=4-line_size*h;\
852 }\
853}\
854\
45553457
ZK
855CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
856CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
857CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
858CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
859CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
860CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
861CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
862CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
b3184779 863
d8085ea7 864#define op_avg(a, b) a = rnd_avg32(a, b)
59fe111e 865#endif
59fe111e
MN
866#define op_put(a, b) a = b
867
868PIXOP2(avg, op_avg)
869PIXOP2(put, op_put)
870#undef op_avg
871#undef op_put
872
de6d9b64
FB
873#define avg2(a,b) ((a+b+1)>>1)
874#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
875
073b013d 876
0c1a9eda 877static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
44eb4951
MN
878{
879 const int A=(16-x16)*(16-y16);
880 const int B=( x16)*(16-y16);
881 const int C=(16-x16)*( y16);
882 const int D=( x16)*( y16);
883 int i;
44eb4951
MN
884
885 for(i=0; i<h; i++)
886 {
b3184779
MN
887 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
888 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
889 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
890 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
891 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
892 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
893 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
894 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
895 dst+= stride;
896 src+= stride;
44eb4951
MN
897 }
898}
899
0c1a9eda 900static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
073b013d
MN
901 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
902{
903 int y, vx, vy;
904 const int s= 1<<shift;
905
906 width--;
907 height--;
908
909 for(y=0; y<h; y++){
910 int x;
911
912 vx= ox;
913 vy= oy;
914 for(x=0; x<8; x++){ //XXX FIXME optimize
915 int src_x, src_y, frac_x, frac_y, index;
916
917 src_x= vx>>16;
918 src_y= vy>>16;
919 frac_x= src_x&(s-1);
920 frac_y= src_y&(s-1);
921 src_x>>=shift;
922 src_y>>=shift;
923
924 if((unsigned)src_x < width){
925 if((unsigned)src_y < height){
926 index= src_x + src_y*stride;
927 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
928 + src[index +1]* frac_x )*(s-frac_y)
929 + ( src[index+stride ]*(s-frac_x)
930 + src[index+stride+1]* frac_x )* frac_y
931 + r)>>(shift*2);
932 }else{
933 index= src_x + clip(src_y, 0, height)*stride;
934 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
935 + src[index +1]* frac_x )*s
936 + r)>>(shift*2);
937 }
938 }else{
939 if((unsigned)src_y < height){
940 index= clip(src_x, 0, width) + src_y*stride;
941 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
942 + src[index+stride ]* frac_y )*s
943 + r)>>(shift*2);
944 }else{
945 index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride;
946 dst[y*stride + x]= src[index ];
947 }
948 }
949
950 vx+= dxx;
951 vy+= dyx;
952 }
953 ox += dxy;
954 oy += dyy;
955 }
956}
669ac79c
MN
957
958static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
959 switch(width){
960 case 2: put_pixels2_c (dst, src, stride, height); break;
961 case 4: put_pixels4_c (dst, src, stride, height); break;
962 case 8: put_pixels8_c (dst, src, stride, height); break;
963 case 16:put_pixels16_c(dst, src, stride, height); break;
964 }
965}
966
967static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
968 int i,j;
969 for (i=0; i < height; i++) {
970 for (j=0; j < width; j++) {
971 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
972 }
973 src += stride;
974 dst += stride;
975 }
976}
977
978static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
979 int i,j;
980 for (i=0; i < height; i++) {
981 for (j=0; j < width; j++) {
982 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
983 }
984 src += stride;
985 dst += stride;
986 }
987}
988
989static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
990 int i,j;
991 for (i=0; i < height; i++) {
992 for (j=0; j < width; j++) {
993 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
994 }
995 src += stride;
996 dst += stride;
997 }
998}
999
1000static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1001 int i,j;
1002 for (i=0; i < height; i++) {
1003 for (j=0; j < width; j++) {
1004 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
1005 }
1006 src += stride;
1007 dst += stride;
1008 }
1009}
1010
1011static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1012 int i,j;
1013 for (i=0; i < height; i++) {
1014 for (j=0; j < width; j++) {
89ebf4e8 1015 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
1016 }
1017 src += stride;
1018 dst += stride;
1019 }
1020}
1021
1022static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1023 int i,j;
1024 for (i=0; i < height; i++) {
1025 for (j=0; j < width; j++) {
1026 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
1027 }
1028 src += stride;
1029 dst += stride;
1030 }
1031}
1032
1033static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1034 int i,j;
1035 for (i=0; i < height; i++) {
1036 for (j=0; j < width; j++) {
89ebf4e8 1037 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
1038 }
1039 src += stride;
1040 dst += stride;
1041 }
1042}
1043
1044static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1045 int i,j;
1046 for (i=0; i < height; i++) {
1047 for (j=0; j < width; j++) {
1048 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
1049 }
1050 src += stride;
1051 dst += stride;
1052 }
1053}
da3b9756
MM
1054
1055static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1056 switch(width){
1057 case 2: avg_pixels2_c (dst, src, stride, height); break;
1058 case 4: avg_pixels4_c (dst, src, stride, height); break;
1059 case 8: avg_pixels8_c (dst, src, stride, height); break;
1060 case 16:avg_pixels16_c(dst, src, stride, height); break;
1061 }
1062}
1063
1064static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1065 int i,j;
1066 for (i=0; i < height; i++) {
1067 for (j=0; j < width; j++) {
1068 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
1069 }
1070 src += stride;
1071 dst += stride;
1072 }
1073}
1074
1075static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1076 int i,j;
1077 for (i=0; i < height; i++) {
1078 for (j=0; j < width; j++) {
1079 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
1080 }
1081 src += stride;
1082 dst += stride;
1083 }
1084}
1085
1086static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1087 int i,j;
1088 for (i=0; i < height; i++) {
1089 for (j=0; j < width; j++) {
1090 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
1091 }
1092 src += stride;
1093 dst += stride;
1094 }
1095}
1096
1097static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1098 int i,j;
1099 for (i=0; i < height; i++) {
1100 for (j=0; j < width; j++) {
1101 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1102 }
1103 src += stride;
1104 dst += stride;
1105 }
1106}
1107
1108static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1109 int i,j;
1110 for (i=0; i < height; i++) {
1111 for (j=0; j < width; j++) {
89ebf4e8 1112 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
1113 }
1114 src += stride;
1115 dst += stride;
1116 }
1117}
1118
1119static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1120 int i,j;
1121 for (i=0; i < height; i++) {
1122 for (j=0; j < width; j++) {
1123 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
1124 }
1125 src += stride;
1126 dst += stride;
1127 }
1128}
1129
1130static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1131 int i,j;
1132 for (i=0; i < height; i++) {
1133 for (j=0; j < width; j++) {
89ebf4e8 1134 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
1135 }
1136 src += stride;
1137 dst += stride;
1138 }
1139}
1140
1141static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1142 int i,j;
1143 for (i=0; i < height; i++) {
1144 for (j=0; j < width; j++) {
1145 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1146 }
1147 src += stride;
1148 dst += stride;
1149 }
1150}
669ac79c
MN
1151#if 0
1152#define TPEL_WIDTH(width)\
1153static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1154 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
1155static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1156 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
1157static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1158 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
1159static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1160 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
1161static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1162 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
1163static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1164 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
1165static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1166 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
1167static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1168 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
1169static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1170 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
1171#endif
1172
0da71265
MN
1173#define H264_CHROMA_MC(OPNAME, OP)\
1174static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1175 const int A=(8-x)*(8-y);\
1176 const int B=( x)*(8-y);\
1177 const int C=(8-x)*( y);\
1178 const int D=( x)*( y);\
1179 int i;\
1180 \
1181 assert(x<8 && y<8 && x>=0 && y>=0);\
1182\
1183 for(i=0; i<h; i++)\
1184 {\
1185 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1186 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1187 dst+= stride;\
1188 src+= stride;\
1189 }\
1190}\
1191\
1192static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1193 const int A=(8-x)*(8-y);\
1194 const int B=( x)*(8-y);\
1195 const int C=(8-x)*( y);\
1196 const int D=( x)*( y);\
1197 int i;\
1198 \
1199 assert(x<8 && y<8 && x>=0 && y>=0);\
1200\
1201 for(i=0; i<h; i++)\
1202 {\
1203 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1204 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1205 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1206 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1207 dst+= stride;\
1208 src+= stride;\
1209 }\
1210}\
1211\
1212static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1213 const int A=(8-x)*(8-y);\
1214 const int B=( x)*(8-y);\
1215 const int C=(8-x)*( y);\
1216 const int D=( x)*( y);\
1217 int i;\
1218 \
1219 assert(x<8 && y<8 && x>=0 && y>=0);\
1220\
1221 for(i=0; i<h; i++)\
1222 {\
1223 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1224 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1225 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1226 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1227 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
1228 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
1229 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
1230 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
1231 dst+= stride;\
1232 src+= stride;\
1233 }\
1234}
1235
1236#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
1237#define op_put(a, b) a = (((b) + 32)>>6)
1238
1239H264_CHROMA_MC(put_ , op_put)
1240H264_CHROMA_MC(avg_ , op_avg)
1241#undef op_avg
1242#undef op_put
1243
1244static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
1245{
1246 int i;
1247 for(i=0; i<h; i++)
1248 {
1249 ST32(dst , LD32(src ));
1250 dst+=dstStride;
1251 src+=srcStride;
1252 }
1253}
1254
1255static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
1256{
1257 int i;
1258 for(i=0; i<h; i++)
1259 {
1260 ST32(dst , LD32(src ));
1261 ST32(dst+4 , LD32(src+4 ));
1262 dst+=dstStride;
1263 src+=srcStride;
1264 }
1265}
1266
1267static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
1268{
1269 int i;
1270 for(i=0; i<h; i++)
1271 {
1272 ST32(dst , LD32(src ));
1273 ST32(dst+4 , LD32(src+4 ));
1274 ST32(dst+8 , LD32(src+8 ));
1275 ST32(dst+12, LD32(src+12));
1276 dst+=dstStride;
1277 src+=srcStride;
1278 }
1279}
073b013d 1280
0c1a9eda 1281static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
44eb4951 1282{
44eb4951
MN
1283 int i;
1284 for(i=0; i<h; i++)
1285 {
b3184779
MN
1286 ST32(dst , LD32(src ));
1287 ST32(dst+4 , LD32(src+4 ));
1288 ST32(dst+8 , LD32(src+8 ));
1289 ST32(dst+12, LD32(src+12));
1290 dst[16]= src[16];
44eb4951
MN
1291 dst+=dstStride;
1292 src+=srcStride;
1293 }
1294}
1295
0c1a9eda 1296static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
44eb4951
MN
1297{
1298 int i;
b3184779 1299 for(i=0; i<h; i++)
44eb4951 1300 {
b3184779
MN
1301 ST32(dst , LD32(src ));
1302 ST32(dst+4 , LD32(src+4 ));
1303 dst[8]= src[8];
44eb4951
MN
1304 dst+=dstStride;
1305 src+=srcStride;
1306 }
1307}
1308
826f429a 1309
b3184779 1310#define QPEL_MC(r, OPNAME, RND, OP) \
0c1a9eda
ZK
1311static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1312 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
b3184779
MN
1313 int i;\
1314 for(i=0; i<h; i++)\
1315 {\
1316 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
1317 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
1318 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
1319 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
1320 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
1321 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
1322 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
1323 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
1324 dst+=dstStride;\
1325 src+=srcStride;\
1326 }\
44eb4951
MN
1327}\
1328\
0c1a9eda 1329static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
db794953 1330 const int w=8;\
0c1a9eda 1331 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
b3184779
MN
1332 int i;\
1333 for(i=0; i<w; i++)\
1334 {\
1335 const int src0= src[0*srcStride];\
1336 const int src1= src[1*srcStride];\
1337 const int src2= src[2*srcStride];\
1338 const int src3= src[3*srcStride];\
1339 const int src4= src[4*srcStride];\
1340 const int src5= src[5*srcStride];\
1341 const int src6= src[6*srcStride];\
1342 const int src7= src[7*srcStride];\
1343 const int src8= src[8*srcStride];\
1344 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
1345 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
1346 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
1347 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
1348 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
1349 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
1350 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
1351 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
1352 dst++;\
1353 src++;\
1354 }\
1355}\
1356\
0c1a9eda
ZK
1357static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1358 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
b3184779 1359 int i;\
826f429a 1360 \
b3184779
MN
1361 for(i=0; i<h; i++)\
1362 {\
1363 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
1364 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
1365 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
1366 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
1367 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
1368 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
1369 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
1370 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
1371 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
1372 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
1373 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
1374 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
1375 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
1376 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
1377 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
1378 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
1379 dst+=dstStride;\
1380 src+=srcStride;\
1381 }\
1382}\
1383\
0c1a9eda
ZK
1384static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1385 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
b3184779 1386 int i;\
826f429a 1387 const int w=16;\
b3184779
MN
1388 for(i=0; i<w; i++)\
1389 {\
1390 const int src0= src[0*srcStride];\
1391 const int src1= src[1*srcStride];\
1392 const int src2= src[2*srcStride];\
1393 const int src3= src[3*srcStride];\
1394 const int src4= src[4*srcStride];\
1395 const int src5= src[5*srcStride];\
1396 const int src6= src[6*srcStride];\
1397 const int src7= src[7*srcStride];\
1398 const int src8= src[8*srcStride];\
1399 const int src9= src[9*srcStride];\
1400 const int src10= src[10*srcStride];\
1401 const int src11= src[11*srcStride];\
1402 const int src12= src[12*srcStride];\
1403 const int src13= src[13*srcStride];\
1404 const int src14= src[14*srcStride];\
1405 const int src15= src[15*srcStride];\
1406 const int src16= src[16*srcStride];\
1407 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
1408 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
1409 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
1410 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
1411 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
1412 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
1413 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
1414 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
1415 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
1416 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
1417 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
1418 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
1419 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
1420 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
1421 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
1422 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
1423 dst++;\
1424 src++;\
1425 }\
1426}\
1427\
0c1a9eda 1428static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
45553457 1429 OPNAME ## pixels8_c(dst, src, stride, 8);\
b3184779
MN
1430}\
1431\
0c1a9eda
ZK
1432static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1433 uint8_t half[64];\
b3184779
MN
1434 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1435 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
44eb4951
MN
1436}\
1437\
0c1a9eda 1438static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
b3184779 1439 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
44eb4951
MN
1440}\
1441\
0c1a9eda
ZK
1442static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1443 uint8_t half[64];\
b3184779
MN
1444 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1445 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
44eb4951
MN
1446}\
1447\
0c1a9eda
ZK
1448static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1449 uint8_t full[16*9];\
1450 uint8_t half[64];\
b3184779 1451 copy_block9(full, src, 16, stride, 9);\
db794953 1452 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
b3184779 1453 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
44eb4951
MN
1454}\
1455\
0c1a9eda
ZK
1456static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1457 uint8_t full[16*9];\
b3184779 1458 copy_block9(full, src, 16, stride, 9);\
db794953 1459 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
44eb4951
MN
1460}\
1461\
0c1a9eda
ZK
1462static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1463 uint8_t full[16*9];\
1464 uint8_t half[64];\
b3184779 1465 copy_block9(full, src, 16, stride, 9);\
db794953 1466 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
b3184779 1467 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
44eb4951 1468}\
0c1a9eda
ZK
1469void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1470 uint8_t full[16*9];\
1471 uint8_t halfH[72];\
1472 uint8_t halfV[64];\
1473 uint8_t halfHV[64];\
b3184779
MN
1474 copy_block9(full, src, 16, stride, 9);\
1475 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1476 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1477 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1478 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1479}\
0c1a9eda
ZK
1480static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1481 uint8_t full[16*9];\
1482 uint8_t halfH[72];\
1483 uint8_t halfHV[64];\
db794953
MN
1484 copy_block9(full, src, 16, stride, 9);\
1485 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1486 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1487 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1488 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1489}\
0c1a9eda
ZK
1490void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1491 uint8_t full[16*9];\
1492 uint8_t halfH[72];\
1493 uint8_t halfV[64];\
1494 uint8_t halfHV[64];\
b3184779
MN
1495 copy_block9(full, src, 16, stride, 9);\
1496 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1497 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1498 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1499 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1500}\
0c1a9eda
ZK
1501static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1502 uint8_t full[16*9];\
1503 uint8_t halfH[72];\
1504 uint8_t halfHV[64];\
db794953
MN
1505 copy_block9(full, src, 16, stride, 9);\
1506 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1507 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1508 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1509 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1510}\
0c1a9eda
ZK
1511void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1512 uint8_t full[16*9];\
1513 uint8_t halfH[72];\
1514 uint8_t halfV[64];\
1515 uint8_t halfHV[64];\
b3184779
MN
1516 copy_block9(full, src, 16, stride, 9);\
1517 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1518 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1519 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1520 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1521}\
0c1a9eda
ZK
1522static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1523 uint8_t full[16*9];\
1524 uint8_t halfH[72];\
1525 uint8_t halfHV[64];\
db794953
MN
1526 copy_block9(full, src, 16, stride, 9);\
1527 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1528 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1529 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1530 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1531}\
0c1a9eda
ZK
1532void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1533 uint8_t full[16*9];\
1534 uint8_t halfH[72];\
1535 uint8_t halfV[64];\
1536 uint8_t halfHV[64];\
b3184779
MN
1537 copy_block9(full, src, 16, stride, 9);\
1538 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
db794953
MN
1539 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1540 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1541 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1542}\
0c1a9eda
ZK
1543static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1544 uint8_t full[16*9];\
1545 uint8_t halfH[72];\
1546 uint8_t halfHV[64];\
db794953
MN
1547 copy_block9(full, src, 16, stride, 9);\
1548 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1549 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1550 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1551 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1552}\
0c1a9eda
ZK
1553static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1554 uint8_t halfH[72];\
1555 uint8_t halfHV[64];\
b3184779 1556 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1557 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1558 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
44eb4951 1559}\
0c1a9eda
ZK
1560static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1561 uint8_t halfH[72];\
1562 uint8_t halfHV[64];\
b3184779 1563 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1564 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1565 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
44eb4951 1566}\
0c1a9eda
ZK
1567void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1568 uint8_t full[16*9];\
1569 uint8_t halfH[72];\
1570 uint8_t halfV[64];\
1571 uint8_t halfHV[64];\
b3184779
MN
1572 copy_block9(full, src, 16, stride, 9);\
1573 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1574 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1575 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1576 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 1577}\
0c1a9eda
ZK
1578static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1579 uint8_t full[16*9];\
1580 uint8_t halfH[72];\
db794953
MN
1581 copy_block9(full, src, 16, stride, 9);\
1582 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1583 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1584 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1585}\
0c1a9eda
ZK
1586void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1587 uint8_t full[16*9];\
1588 uint8_t halfH[72];\
1589 uint8_t halfV[64];\
1590 uint8_t halfHV[64];\
b3184779
MN
1591 copy_block9(full, src, 16, stride, 9);\
1592 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1593 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1594 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1595 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 1596}\
0c1a9eda
ZK
1597static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1598 uint8_t full[16*9];\
1599 uint8_t halfH[72];\
db794953
MN
1600 copy_block9(full, src, 16, stride, 9);\
1601 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1602 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1603 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1604}\
0c1a9eda
ZK
1605static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1606 uint8_t halfH[72];\
b3184779 1607 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1608 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
b3184779 1609}\
0c1a9eda 1610static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
45553457 1611 OPNAME ## pixels16_c(dst, src, stride, 16);\
b3184779
MN
1612}\
1613\
0c1a9eda
ZK
1614static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1615 uint8_t half[256];\
b3184779
MN
1616 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1617 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
1618}\
1619\
0c1a9eda 1620static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
b3184779 1621 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
44eb4951 1622}\
b3184779 1623\
0c1a9eda
ZK
1624static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1625 uint8_t half[256];\
b3184779
MN
1626 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1627 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
1628}\
1629\
0c1a9eda
ZK
1630static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1631 uint8_t full[24*17];\
1632 uint8_t half[256];\
b3184779 1633 copy_block17(full, src, 24, stride, 17);\
826f429a 1634 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
b3184779
MN
1635 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
1636}\
1637\
0c1a9eda
ZK
1638static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1639 uint8_t full[24*17];\
b3184779 1640 copy_block17(full, src, 24, stride, 17);\
826f429a 1641 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
b3184779
MN
1642}\
1643\
0c1a9eda
ZK
1644static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1645 uint8_t full[24*17];\
1646 uint8_t half[256];\
b3184779 1647 copy_block17(full, src, 24, stride, 17);\
826f429a 1648 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
b3184779
MN
1649 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
1650}\
0c1a9eda
ZK
1651void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1652 uint8_t full[24*17];\
1653 uint8_t halfH[272];\
1654 uint8_t halfV[256];\
1655 uint8_t halfHV[256];\
b3184779
MN
1656 copy_block17(full, src, 24, stride, 17);\
1657 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1658 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1659 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1660 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1661}\
0c1a9eda
ZK
1662static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1663 uint8_t full[24*17];\
1664 uint8_t halfH[272];\
1665 uint8_t halfHV[256];\
db794953
MN
1666 copy_block17(full, src, 24, stride, 17);\
1667 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1668 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1669 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1670 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1671}\
0c1a9eda
ZK
1672void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1673 uint8_t full[24*17];\
1674 uint8_t halfH[272];\
1675 uint8_t halfV[256];\
1676 uint8_t halfHV[256];\
b3184779
MN
1677 copy_block17(full, src, 24, stride, 17);\
1678 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1679 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1680 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1681 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1682}\
0c1a9eda
ZK
1683static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1684 uint8_t full[24*17];\
1685 uint8_t halfH[272];\
1686 uint8_t halfHV[256];\
db794953
MN
1687 copy_block17(full, src, 24, stride, 17);\
1688 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1689 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1690 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1691 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1692}\
0c1a9eda
ZK
1693void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1694 uint8_t full[24*17];\
1695 uint8_t halfH[272];\
1696 uint8_t halfV[256];\
1697 uint8_t halfHV[256];\
b3184779
MN
1698 copy_block17(full, src, 24, stride, 17);\
1699 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1700 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1701 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1702 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1703}\
0c1a9eda
ZK
1704static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1705 uint8_t full[24*17];\
1706 uint8_t halfH[272];\
1707 uint8_t halfHV[256];\
db794953
MN
1708 copy_block17(full, src, 24, stride, 17);\
1709 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1710 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1711 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1712 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1713}\
0c1a9eda
ZK
1714void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1715 uint8_t full[24*17];\
1716 uint8_t halfH[272];\
1717 uint8_t halfV[256];\
1718 uint8_t halfHV[256];\
b3184779
MN
1719 copy_block17(full, src, 24, stride, 17);\
1720 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
826f429a
MN
1721 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1722 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1723 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1724}\
0c1a9eda
ZK
1725static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1726 uint8_t full[24*17];\
1727 uint8_t halfH[272];\
1728 uint8_t halfHV[256];\
db794953
MN
1729 copy_block17(full, src, 24, stride, 17);\
1730 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1731 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1732 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1733 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1734}\
0c1a9eda
ZK
1735static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1736 uint8_t halfH[272];\
1737 uint8_t halfHV[256];\
b3184779 1738 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1739 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1740 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1741}\
0c1a9eda
ZK
1742static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1743 uint8_t halfH[272];\
1744 uint8_t halfHV[256];\
b3184779 1745 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1746 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1747 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1748}\
0c1a9eda
ZK
1749void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1750 uint8_t full[24*17];\
1751 uint8_t halfH[272];\
1752 uint8_t halfV[256];\
1753 uint8_t halfHV[256];\
b3184779
MN
1754 copy_block17(full, src, 24, stride, 17);\
1755 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1756 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1757 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1758 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
1759}\
0c1a9eda
ZK
1760static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1761 uint8_t full[24*17];\
1762 uint8_t halfH[272];\
db794953
MN
1763 copy_block17(full, src, 24, stride, 17);\
1764 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1765 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1766 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1767}\
0c1a9eda
ZK
1768void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1769 uint8_t full[24*17];\
1770 uint8_t halfH[272];\
1771 uint8_t halfV[256];\
1772 uint8_t halfHV[256];\
b3184779
MN
1773 copy_block17(full, src, 24, stride, 17);\
1774 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1775 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1776 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1777 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
1778}\
0c1a9eda
ZK
1779static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1780 uint8_t full[24*17];\
1781 uint8_t halfH[272];\
db794953
MN
1782 copy_block17(full, src, 24, stride, 17);\
1783 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1784 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1785 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1786}\
0c1a9eda
ZK
1787static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1788 uint8_t halfH[272];\
b3184779 1789 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1790 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
45553457 1791}
44eb4951 1792
b3184779
MN
1793#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1794#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1795#define op_put(a, b) a = cm[((b) + 16)>>5]
1796#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1797
1798QPEL_MC(0, put_ , _ , op_put)
1799QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1800QPEL_MC(0, avg_ , _ , op_avg)
1801//QPEL_MC(1, avg_no_rnd , _ , op_avg)
1802#undef op_avg
1803#undef op_avg_no_rnd
1804#undef op_put
1805#undef op_put_no_rnd
44eb4951 1806
0da71265
MN
1807#if 1
1808#define H264_LOWPASS(OPNAME, OP, OP2) \
1809static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1810 const int h=4;\
1811 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1812 int i;\
1813 for(i=0; i<h; i++)\
1814 {\
1815 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
1816 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
1817 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
1818 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
1819 dst+=dstStride;\
1820 src+=srcStride;\
1821 }\
1822}\
1823\
1824static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1825 const int w=4;\
1826 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1827 int i;\
1828 for(i=0; i<w; i++)\
1829 {\
1830 const int srcB= src[-2*srcStride];\
1831 const int srcA= src[-1*srcStride];\
1832 const int src0= src[0 *srcStride];\
1833 const int src1= src[1 *srcStride];\
1834 const int src2= src[2 *srcStride];\
1835 const int src3= src[3 *srcStride];\
1836 const int src4= src[4 *srcStride];\
1837 const int src5= src[5 *srcStride];\
1838 const int src6= src[6 *srcStride];\
1839 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
1840 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
1841 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
1842 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
1843 dst++;\
1844 src++;\
1845 }\
1846}\
1847\
1848static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
1849 const int h=4;\
1850 const int w=4;\
1851 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1852 int i;\
1853 src -= 2*srcStride;\
1854 for(i=0; i<h+5; i++)\
1855 {\
1856 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
1857 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
1858 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
1859 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
1860 tmp+=tmpStride;\
1861 src+=srcStride;\
1862 }\
1863 tmp -= tmpStride*(h+5-2);\
1864 for(i=0; i<w; i++)\
1865 {\
1866 const int tmpB= tmp[-2*tmpStride];\
1867 const int tmpA= tmp[-1*tmpStride];\
1868 const int tmp0= tmp[0 *tmpStride];\
1869 const int tmp1= tmp[1 *tmpStride];\
1870 const int tmp2= tmp[2 *tmpStride];\
1871 const int tmp3= tmp[3 *tmpStride];\
1872 const int tmp4= tmp[4 *tmpStride];\
1873 const int tmp5= tmp[5 *tmpStride];\
1874 const int tmp6= tmp[6 *tmpStride];\
1875 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
1876 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
1877 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
1878 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
1879 dst++;\
1880 tmp++;\
1881 }\
1882}\
1883\
1884static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1885 const int h=8;\
1886 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1887 int i;\
1888 for(i=0; i<h; i++)\
1889 {\
1890 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
1891 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
1892 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
1893 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
1894 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
1895 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
1896 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
1897 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
1898 dst+=dstStride;\
1899 src+=srcStride;\
1900 }\
1901}\
1902\
1903static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1904 const int w=8;\
1905 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1906 int i;\
1907 for(i=0; i<w; i++)\
1908 {\
1909 const int srcB= src[-2*srcStride];\
1910 const int srcA= src[-1*srcStride];\
1911 const int src0= src[0 *srcStride];\
1912 const int src1= src[1 *srcStride];\
1913 const int src2= src[2 *srcStride];\
1914 const int src3= src[3 *srcStride];\
1915 const int src4= src[4 *srcStride];\
1916 const int src5= src[5 *srcStride];\
1917 const int src6= src[6 *srcStride];\
1918 const int src7= src[7 *srcStride];\
1919 const int src8= src[8 *srcStride];\
1920 const int src9= src[9 *srcStride];\
1921 const int src10=src[10*srcStride];\
1922 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
1923 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
1924 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
1925 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
1926 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
1927 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
1928 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
1929 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
1930 dst++;\
1931 src++;\
1932 }\
1933}\
1934\
1935static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
1936 const int h=8;\
1937 const int w=8;\
1938 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1939 int i;\
1940 src -= 2*srcStride;\
1941 for(i=0; i<h+5; i++)\
1942 {\
1943 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
1944 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
1945 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
1946 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
1947 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
1948 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
1949 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
1950 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
1951 tmp+=tmpStride;\
1952 src+=srcStride;\
1953 }\
1954 tmp -= tmpStride*(h+5-2);\
1955 for(i=0; i<w; i++)\
1956 {\
1957 const int tmpB= tmp[-2*tmpStride];\
1958 const int tmpA= tmp[-1*tmpStride];\
1959 const int tmp0= tmp[0 *tmpStride];\
1960 const int tmp1= tmp[1 *tmpStride];\
1961 const int tmp2= tmp[2 *tmpStride];\
1962 const int tmp3= tmp[3 *tmpStride];\
1963 const int tmp4= tmp[4 *tmpStride];\
1964 const int tmp5= tmp[5 *tmpStride];\
1965 const int tmp6= tmp[6 *tmpStride];\
1966 const int tmp7= tmp[7 *tmpStride];\
1967 const int tmp8= tmp[8 *tmpStride];\
1968 const int tmp9= tmp[9 *tmpStride];\
1969 const int tmp10=tmp[10*tmpStride];\
1970 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
1971 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
1972 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
1973 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
1974 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
1975 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
1976 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
1977 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
1978 dst++;\
1979 tmp++;\
1980 }\
1981}\
1982\
1983static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1984 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
1985 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
1986 src += 8*srcStride;\
1987 dst += 8*dstStride;\
1988 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
1989 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
1990}\
1991\
1992static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1993 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
1994 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
1995 src += 8*srcStride;\
1996 dst += 8*dstStride;\
1997 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
1998 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
1999}\
2000\
2001static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2002 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2003 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2004 src += 8*srcStride;\
2005 tmp += 8*tmpStride;\
2006 dst += 8*dstStride;\
2007 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2008 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2009}\
2010
2011#define H264_MC(OPNAME, SIZE) \
2012static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
2013 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
2014}\
2015\
2016static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2017 uint8_t half[SIZE*SIZE];\
2018 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2019 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
2020}\
2021\
2022static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
2023 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
2024}\
2025\
2026static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2027 uint8_t half[SIZE*SIZE];\
2028 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2029 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
2030}\
2031\
2032static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2033 uint8_t full[SIZE*(SIZE+5)];\
2034 uint8_t * const full_mid= full + SIZE*2;\
2035 uint8_t half[SIZE*SIZE];\
2036 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2037 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2038 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
2039}\
2040\
2041static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2042 uint8_t full[SIZE*(SIZE+5)];\
2043 uint8_t * const full_mid= full + SIZE*2;\
2044 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2045 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
2046}\
2047\
2048static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2049 uint8_t full[SIZE*(SIZE+5)];\
2050 uint8_t * const full_mid= full + SIZE*2;\
2051 uint8_t half[SIZE*SIZE];\
2052 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2053 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2054 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
2055}\
2056\
2057static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2058 uint8_t full[SIZE*(SIZE+5)];\
2059 uint8_t * const full_mid= full + SIZE*2;\
2060 uint8_t halfH[SIZE*SIZE];\
2061 uint8_t halfV[SIZE*SIZE];\
2062 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2063 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2064 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2065 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2066}\
2067\
2068static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2069 uint8_t full[SIZE*(SIZE+5)];\
2070 uint8_t * const full_mid= full + SIZE*2;\
2071 uint8_t halfH[SIZE*SIZE];\
2072 uint8_t halfV[SIZE*SIZE];\
2073 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2074 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2075 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2076 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2077}\
2078\
2079static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2080 uint8_t full[SIZE*(SIZE+5)];\
2081 uint8_t * const full_mid= full + SIZE*2;\
2082 uint8_t halfH[SIZE*SIZE];\
2083 uint8_t halfV[SIZE*SIZE];\
2084 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2085 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2086 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2087 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2088}\
2089\
2090static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2091 uint8_t full[SIZE*(SIZE+5)];\
2092 uint8_t * const full_mid= full + SIZE*2;\
2093 uint8_t halfH[SIZE*SIZE];\
2094 uint8_t halfV[SIZE*SIZE];\
2095 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2096 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2097 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2098 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2099}\
2100\
2101static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2102 int16_t tmp[SIZE*(SIZE+5)];\
2103 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
2104}\
2105\
2106static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2107 int16_t tmp[SIZE*(SIZE+5)];\
2108 uint8_t halfH[SIZE*SIZE];\
2109 uint8_t halfHV[SIZE*SIZE];\
2110 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2111 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2112 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2113}\
2114\
2115static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2116 int16_t tmp[SIZE*(SIZE+5)];\
2117 uint8_t halfH[SIZE*SIZE];\
2118 uint8_t halfHV[SIZE*SIZE];\
2119 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2120 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2121 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2122}\
2123\
2124static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2125 uint8_t full[SIZE*(SIZE+5)];\
2126 uint8_t * const full_mid= full + SIZE*2;\
2127 int16_t tmp[SIZE*(SIZE+5)];\
2128 uint8_t halfV[SIZE*SIZE];\
2129 uint8_t halfHV[SIZE*SIZE];\
2130 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2131 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2132 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2133 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2134}\
2135\
2136static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2137 uint8_t full[SIZE*(SIZE+5)];\
2138 uint8_t * const full_mid= full + SIZE*2;\
2139 int16_t tmp[SIZE*(SIZE+5)];\
2140 uint8_t halfV[SIZE*SIZE];\
2141 uint8_t halfHV[SIZE*SIZE];\
2142 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2143 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2144 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2145 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2146}\
2147
2148#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2149//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
2150#define op_put(a, b) a = cm[((b) + 16)>>5]
2151#define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
2152#define op2_put(a, b) a = cm[((b) + 512)>>10]
2153
2154H264_LOWPASS(put_ , op_put, op2_put)
2155H264_LOWPASS(avg_ , op_avg, op2_avg)
2156H264_MC(put_, 4)
2157H264_MC(put_, 8)
2158H264_MC(put_, 16)
2159H264_MC(avg_, 4)
2160H264_MC(avg_, 8)
2161H264_MC(avg_, 16)
2162
2163#undef op_avg
2164#undef op_put
2165#undef op2_avg
2166#undef op2_put
2167#endif
2168
1457ab52
MN
2169static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
2170 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2171 int i;
2172
2173 for(i=0; i<h; i++){
2174 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
2175 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
2176 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
2177 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
2178 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
2179 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
2180 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
2181 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
2182 dst+=dstStride;
2183 src+=srcStride;
2184 }
2185}
2186
2187static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
2188 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2189 int i;
2190
2191 for(i=0; i<w; i++){
2192 const int src_1= src[ -srcStride];
2193 const int src0 = src[0 ];
2194 const int src1 = src[ srcStride];
2195 const int src2 = src[2*srcStride];
2196 const int src3 = src[3*srcStride];
2197 const int src4 = src[4*srcStride];
2198 const int src5 = src[5*srcStride];
2199 const int src6 = src[6*srcStride];
2200 const int src7 = src[7*srcStride];
2201 const int src8 = src[8*srcStride];
2202 const int src9 = src[9*srcStride];
2203 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
2204 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
2205 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
2206 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
2207 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
2208 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
2209 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
2210 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
2211 src++;
2212 dst++;
2213 }
2214}
2215
2216static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
2217 put_pixels8_c(dst, src, stride, 8);
2218}
2219
2220static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
2221 uint8_t half[64];
2222 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
2223 put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
2224}
2225
2226static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
2227 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
2228}
2229
2230static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
2231 uint8_t half[64];
2232 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
2233 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
2234}
2235
2236static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
2237 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
2238}
2239
2240static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
2241 uint8_t halfH[88];
2242 uint8_t halfV[64];
2243 uint8_t halfHV[64];
2244 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2245 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
2246 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
2247 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
2248}
2249static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
2250 uint8_t halfH[88];
2251 uint8_t halfV[64];
2252 uint8_t halfHV[64];
2253 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2254 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
2255 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
2256 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
2257}
2258static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
2259 uint8_t halfH[88];
2260 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2261 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
2262}
2263
332f9ac4
MN
2264static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
2265 int x;
2266 const int strength= ff_h263_loop_filter_strength[qscale];
2267
2268 for(x=0; x<8; x++){
2269 int d1, d2, ad1;
2270 int p0= src[x-2*stride];
2271 int p1= src[x-1*stride];
2272 int p2= src[x+0*stride];
2273 int p3= src[x+1*stride];
2274 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
2275
2276 if (d<-2*strength) d1= 0;
2277 else if(d<- strength) d1=-2*strength - d;
2278 else if(d< strength) d1= d;
2279 else if(d< 2*strength) d1= 2*strength - d;
2280 else d1= 0;
2281
2282 p1 += d1;
2283 p2 -= d1;
2284 if(p1&256) p1= ~(p1>>31);
2285 if(p2&256) p2= ~(p2>>31);
2286
2287 src[x-1*stride] = p1;
2288 src[x+0*stride] = p2;
2289
5b5404e3 2290 ad1= ABS(d1)>>1;
332f9ac4
MN
2291
2292 d2= clip((p0-p3)/4, -ad1, ad1);
2293
2294 src[x-2*stride] = p0 - d2;
2295 src[x+ stride] = p3 + d2;
2296 }
2297}
2298
2299static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
2300 int y;
2301 const int strength= ff_h263_loop_filter_strength[qscale];
2302
2303 for(y=0; y<8; y++){
2304 int d1, d2, ad1;
2305 int p0= src[y*stride-2];
2306 int p1= src[y*stride-1];
2307 int p2= src[y*stride+0];
2308 int p3= src[y*stride+1];
2309 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
2310
2311 if (d<-2*strength) d1= 0;
2312 else if(d<- strength) d1=-2*strength - d;
2313 else if(d< strength) d1= d;
2314 else if(d< 2*strength) d1= 2*strength - d;
2315 else d1= 0;
2316
2317 p1 += d1;
2318 p2 -= d1;
2319 if(p1&256) p1= ~(p1>>31);
2320 if(p2&256) p2= ~(p2>>31);
2321
2322 src[y*stride-1] = p1;
2323 src[y*stride+0] = p2;
2324
2325 ad1= ABS(d1)>>1;
2326
2327 d2= clip((p0-p3)/4, -ad1, ad1);
2328
2329 src[y*stride-2] = p0 - d2;
2330 src[y*stride+1] = p3 + d2;
2331 }
2332}
1457ab52 2333
bb198e19 2334static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
2335{
2336 int s, i;
2337
2338 s = 0;
bb198e19 2339 for(i=0;i<h;i++) {
de6d9b64
FB
2340 s += abs(pix1[0] - pix2[0]);
2341 s += abs(pix1[1] - pix2[1]);
2342 s += abs(pix1[2] - pix2[2]);
2343 s += abs(pix1[3] - pix2[3]);
2344 s += abs(pix1[4] - pix2[4]);
2345 s += abs(pix1[5] - pix2[5]);
2346 s += abs(pix1[6] - pix2[6]);
2347 s += abs(pix1[7] - pix2[7]);
2348 s += abs(pix1[8] - pix2[8]);
2349 s += abs(pix1[9] - pix2[9]);
2350 s += abs(pix1[10] - pix2[10]);
2351 s += abs(pix1[11] - pix2[11]);
2352 s += abs(pix1[12] - pix2[12]);
2353 s += abs(pix1[13] - pix2[13]);
2354 s += abs(pix1[14] - pix2[14]);
2355 s += abs(pix1[15] - pix2[15]);
2356 pix1 += line_size;
2357 pix2 += line_size;
2358 }
2359 return s;
2360}
2361
bb198e19 2362static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
2363{
2364 int s, i;
2365
2366 s = 0;
bb198e19 2367 for(i=0;i<h;i++) {
de6d9b64
FB
2368 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
2369 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
2370 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
2371 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
2372 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
2373 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
2374 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
2375 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
2376 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
2377 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
2378 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
2379 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
2380 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
2381 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
2382 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
2383 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
2384 pix1 += line_size;
2385 pix2 += line_size;
2386 }
2387 return s;
2388}
2389
bb198e19 2390static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
2391{
2392 int s, i;
0c1a9eda 2393 uint8_t *pix3 = pix2 + line_size;
de6d9b64
FB
2394
2395 s = 0;
bb198e19 2396 for(i=0;i<h;i++) {
de6d9b64
FB
2397 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
2398 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
2399 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
2400 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
2401 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
2402 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
2403 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
2404 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
2405 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
2406 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
2407 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
2408 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
2409 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
2410 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
2411 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
2412 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
2413 pix1 += line_size;
2414 pix2 += line_size;
2415 pix3 += line_size;
2416 }
2417 return s;
2418}
2419
bb198e19 2420static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
2421{
2422 int s, i;
0c1a9eda 2423 uint8_t *pix3 = pix2 + line_size;
de6d9b64
FB
2424
2425 s = 0;
bb198e19 2426 for(i=0;i<h;i++) {
de6d9b64
FB
2427 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
2428 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
2429 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
2430 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
2431 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
2432 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
2433 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
2434 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
2435 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
2436 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
2437 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
2438 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
2439 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
2440 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
2441 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
2442 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
2443 pix1 += line_size;
2444 pix2 += line_size;
2445 pix3 += line_size;
2446 }
2447 return s;
2448}
2449
bb198e19 2450static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
2451{
2452 int s, i;
2453
2454 s = 0;
bb198e19 2455 for(i=0;i<h;i++) {
ba6802de
MN
2456 s += abs(pix1[0] - pix2[0]);
2457 s += abs(pix1[1] - pix2[1]);
2458 s += abs(pix1[2] - pix2[2]);
2459 s += abs(pix1[3] - pix2[3]);
2460 s += abs(pix1[4] - pix2[4]);
2461 s += abs(pix1[5] - pix2[5]);
2462 s += abs(pix1[6] - pix2[6]);
2463 s += abs(pix1[7] - pix2[7]);
2464 pix1 += line_size;
2465 pix2 += line_size;
2466 }
2467 return s;
2468}
2469
bb198e19 2470static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
2471{
2472 int s, i;
2473
2474 s = 0;
bb198e19 2475 for(i=0;i<h;i++) {
ba6802de
MN
2476 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
2477 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
2478 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
2479 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
2480 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
2481 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
2482 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
2483 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
2484 pix1 += line_size;
2485 pix2 += line_size;
2486 }
2487 return s;
2488}
2489
bb198e19 2490static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
2491{
2492 int s, i;
0c1a9eda 2493 uint8_t *pix3 = pix2 + line_size;
ba6802de
MN
2494
2495 s = 0;
bb198e19 2496 for(i=0;i<h;i++) {
ba6802de
MN
2497 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
2498 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
2499 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
2500 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
2501 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
2502 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
2503 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
2504 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
2505 pix1 += line_size;
2506 pix2 += line_size;
2507 pix3 += line_size;
2508 }
2509 return s;
2510}
2511
bb198e19 2512static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
2513{
2514 int s, i;
0c1a9eda 2515 uint8_t *pix3 = pix2 + line_size;
ba6802de
MN
2516
2517 s = 0;
bb198e19 2518 for(i=0;i<h;i++) {
ba6802de
MN
2519 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
2520 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
2521 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
2522 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
2523 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
2524 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
2525 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
2526 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
2527 pix1 += line_size;
2528 pix2 += line_size;
2529 pix3 += line_size;
2530 }
2531 return s;
2532}
2533
a9badb51
MN
2534/**
2535 * permutes an 8x8 block.
2a5700de 2536 * @param block the block which will be permuted according to the given permutation vector
a9badb51
MN
2537 * @param permutation the permutation vector
2538 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
2a5700de
MN
2539 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
2540 * (inverse) permutated to scantable order!
a9badb51 2541 */
0c1a9eda 2542void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
d962f6fd 2543{
7801d21d 2544 int i;
477ab036 2545 DCTELEM temp[64];
7801d21d
MN
2546
2547 if(last<=0) return;
9a7b310d 2548 //if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms
d962f6fd 2549
7801d21d
MN
2550 for(i=0; i<=last; i++){
2551 const int j= scantable[i];
2552 temp[j]= block[j];
2553 block[j]=0;
2554 }
2555
2556 for(i=0; i<=last; i++){
2557 const int j= scantable[i];
2558 const int perm_j= permutation[j];
2559 block[perm_j]= temp[j];
2560 }
d962f6fd 2561}
e0eac44e 2562
2a5700de
MN
2563/**
2564 * memset(blocks, 0, sizeof(DCTELEM)*6*64)
2565 */
eb4b3dd3 2566static void clear_blocks_c(DCTELEM *blocks)
649c00c9
MN
2567{
2568 memset(blocks, 0, sizeof(DCTELEM)*6*64);
2569}
2570
11f18faf
MN
2571static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
2572 int i;
d32ac509 2573 for(i=0; i+7<w; i+=8){
11f18faf
MN
2574 dst[i+0] += src[i+0];
2575 dst[i+1] += src[i+1];
2576 dst[i+2] += src[i+2];
2577 dst[i+3] += src[i+3];
2578 dst[i+4] += src[i+4];
2579 dst[i+5] += src[i+5];
2580 dst[i+6] += src[i+6];
2581 dst[i+7] += src[i+7];
2582 }
2583 for(; i<w; i++)
2584 dst[i+0] += src[i+0];
2585}
2586
2587static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
2588 int i;
d32ac509 2589 for(i=0; i+7<w; i+=8){
11f18faf
MN
2590 dst[i+0] = src1[i+0]-src2[i+0];
2591 dst[i+1] = src1[i+1]-src2[i+1];
2592 dst[i+2] = src1[i+2]-src2[i+2];
2593 dst[i+3] = src1[i+3]-src2[i+3];
2594 dst[i+4] = src1[i+4]-src2[i+4];
2595 dst[i+5] = src1[i+5]-src2[i+5];
2596 dst[i+6] = src1[i+6]-src2[i+6];
2597 dst[i+7] = src1[i+7]-src2[i+7];
2598 }
2599 for(; i<w; i++)
2600 dst[i+0] = src1[i+0]-src2[i+0];
2601}
2602
84705403
MN
2603static void sub_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){
2604 int i;
2605 uint8_t l, lt;
2606
2607 l= *left;
2608 lt= *left_top;
2609
2610 for(i=0; i<w; i++){
2611 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
2612 lt= src1[i];
2613 l= src2[i];
2614 dst[i]= l - pred;
2615 }
2616
2617 *left= l;
2618 *left_top= lt;
2619}
2620
1457ab52
MN
2621#define BUTTERFLY2(o1,o2,i1,i2) \
2622o1= (i1)+(i2);\
2623o2= (i1)-(i2);
2624
2625#define BUTTERFLY1(x,y) \
2626{\
2627 int a,b;\
2628 a= x;\
2629 b= y;\
2630 x= a+b;\
2631 y= a-b;\
2632}
2633
2634#define BUTTERFLYA(x,y) (ABS((x)+(y)) + ABS((x)-(y)))
2635
bb198e19 2636static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
1457ab52
MN
2637 int i;
2638 int temp[64];
2639 int sum=0;
bb198e19
MN
2640
2641 assert(h==8);
1457ab52
MN
2642
2643 for(i=0; i<8; i++){
2644 //FIXME try pointer walks
2645 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
2646 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
2647 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
2648 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
2649
2650 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2651 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2652 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2653 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2654
2655 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2656 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2657 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2658 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2659 }
2660
2661 for(i=0; i<8; i++){
2662 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2663 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2664 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2665 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2666
2667 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2668 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2669 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2670 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2671
2672 sum +=
2673 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2674 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2675 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2676 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2677 }
2678#if 0
2679static int maxi=0;
2680if(sum>maxi){
2681 maxi=sum;
2682 printf("MAX:%d\n", maxi);
2683}
2684#endif
2685 return sum;
2686}
2687
2688static int hadamard8_abs_c(uint8_t *src, int stride, int mean){
2689 int i;
2690 int temp[64];
2691 int sum=0;
2692//FIXME OOOPS ignore 0 term instead of mean mess
2693 for(i=0; i<8; i++){
2694 //FIXME try pointer walks
2695 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-mean,src[stride*i+1]-mean);
2696 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-mean,src[stride*i+3]-mean);
2697 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-mean,src[stride*i+5]-mean);
2698 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-mean,src[stride*i+7]-mean);
2699
2700 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2701 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2702 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2703 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2704
2705 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2706 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2707 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2708 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2709 }
2710
2711 for(i=0; i<8; i++){
2712 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2713 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2714 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2715 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2716
2717 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2718 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2719 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2720 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2721
2722 sum +=
2723 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2724 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2725 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2726 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2727 }
2728
2729 return sum;
2730}
2731
bb198e19 2732static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
1457ab52 2733 MpegEncContext * const s= (MpegEncContext *)c;
76fbb024
MN
2734 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
2735 DCTELEM * const temp= (DCTELEM*)aligned_temp;
1457ab52 2736 int sum=0, i;
bb198e19
MN
2737
2738 assert(h==8);
1457ab52
MN
2739
2740 s->dsp.diff_pixels(temp, src1, src2, stride);
b0368839 2741 s->dsp.fdct(temp);
1457ab52
MN
2742
2743 for(i=0; i<64; i++)
2744 sum+= ABS(temp[i]);
2745
2746 return sum;
2747}
2748
0e15384d 2749void simple_idct(DCTELEM *block); //FIXME
1457ab52 2750
bb198e19 2751static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
1457ab52 2752 MpegEncContext * const s= (MpegEncContext *)c;
76fbb024
MN
2753 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64*2/8];
2754 DCTELEM * const temp= (DCTELEM*)aligned_temp;
2755 DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64;
1457ab52
MN
2756 int sum=0, i;
2757
bb198e19 2758 assert(h==8);
1457ab52
MN
2759 s->mb_intra=0;
2760
2761 s->dsp.diff_pixels(temp, src1, src2, stride);
2762
2763 memcpy(bak, temp, 64*sizeof(DCTELEM));
2764
67725183 2765 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
d50635cd 2766 s->dct_unquantize_inter(s, temp, 0, s->qscale);
1457ab52
MN
2767 simple_idct(temp); //FIXME
2768
2769 for(i=0; i<64; i++)
2770 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
2771
2772 return sum;
2773}
2774
bb198e19 2775static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3a87ac94 2776 MpegEncContext * const s= (MpegEncContext *)c;
0c1a9eda 2777 const uint8_t *scantable= s->intra_scantable.permutated;
76fbb024
MN
2778 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
2779 uint64_t __align8 aligned_bak[stride];
2780 DCTELEM * const temp= (DCTELEM*)aligned_temp;
2781 uint8_t * const bak= (uint8_t*)aligned_bak;
3a87ac94
MN
2782 int i, last, run, bits, level, distoration, start_i;
2783 const int esc_length= s->ac_esc_length;
2784 uint8_t * length;
2785 uint8_t * last_length;
67725183 2786
bb198e19
MN
2787 assert(h==8);
2788
67725183
MN
2789 for(i=0; i<8; i++){
2790 ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0];
2791 ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1];
2792 }
3a87ac94 2793
67725183
MN
2794 s->dsp.diff_pixels(temp, src1, src2, stride);
2795
2796 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2797
2798 bits=0;
3a87ac94
MN
2799
2800 if (s->mb_intra) {
67725183 2801 start_i = 1;
3a87ac94
MN
2802 length = s->intra_ac_vlc_length;
2803 last_length= s->intra_ac_vlc_last_length;
67725183 2804 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3a87ac94
MN
2805 } else {
2806 start_i = 0;
2807 length = s->inter_ac_vlc_length;
2808 last_length= s->inter_ac_vlc_last_length;
2809 }
3a87ac94 2810
67725183 2811 if(last>=start_i){
3a87ac94
MN
2812 run=0;
2813 for(i=start_i; i<last; i++){
2814 int j= scantable[i];
2815 level= temp[j];
2816
2817 if(level){
2818 level+=64;
2819 if((level&(~127)) == 0){
2820 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2821 }else
2822 bits+= esc_length;
2823 run=0;
2824 }else
2825 run++;
2826 }
2827 i= scantable[last];
1d0eab1d 2828
3a87ac94 2829 level= temp[i] + 64;
1d0eab1d
MN
2830
2831 assert(level - 64);
2832
3a87ac94
MN
2833 if((level&(~127)) == 0){
2834 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2835 }else
2836 bits+= esc_length;
2837
67725183
MN
2838 }
2839
2840 if(last>=0){
d50635cd
MN
2841 if(s->mb_intra)
2842 s->dct_unquantize_intra(s, temp, 0, s->qscale);
2843 else
2844 s->dct_unquantize_inter(s, temp, 0, s->qscale);
3a87ac94
MN
2845 }
2846
b0368839 2847 s->dsp.idct_add(bak, stride, temp);
3a87ac94 2848
bb198e19 2849 distoration= s->dsp.sse[1](NULL, bak, src1, stride, 8);
3a87ac94 2850
67725183 2851 return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7);
3a87ac94
MN
2852}
2853
bb198e19 2854static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3a87ac94 2855 MpegEncContext * const s= (MpegEncContext *)c;
0c1a9eda 2856 const uint8_t *scantable= s->intra_scantable.permutated;
76fbb024
MN
2857 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
2858 DCTELEM * const temp= (DCTELEM*)aligned_temp;
3a87ac94
MN
2859 int i, last, run, bits, level, start_i;
2860 const int esc_length= s->ac_esc_length;
2861 uint8_t * length;
2862 uint8_t * last_length;
bb198e19
MN
2863
2864 assert(h==8);
67725183
MN
2865
2866 s->dsp.diff_pixels(temp, src1, src2, stride);
3a87ac94 2867
67725183
MN
2868 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2869
2870 bits=0;
3a87ac94
MN
2871
2872 if (s->mb_intra) {
67725183 2873 start_i = 1;
3a87ac94
MN
2874 length = s->intra_ac_vlc_length;
2875 last_length= s->intra_ac_vlc_last_length;
67725183 2876 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3a87ac94
MN
2877 } else {
2878 start_i = 0;
2879 length = s->inter_ac_vlc_length;
2880 last_length= s->inter_ac_vlc_last_length;
2881 }
3a87ac94 2882
67725183 2883 if(last>=start_i){
3a87ac94
MN
2884 run=0;
2885 for(i=start_i; i<last; i++){
2886 int j= scantable[i];
2887 level= temp[j];
2888
2889 if(level){
2890 level+=64;
2891 if((level&(~127)) == 0){
2892 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2893 }else
2894 bits+= esc_length;
2895 run=0;
2896 }else
2897 run++;
2898 }
2899 i= scantable[last];
67725183
MN
2900
2901 level= temp[i] + 64;
3a87ac94 2902
67725183 2903 assert(level - 64);
3a87ac94 2904
3a87ac94
MN
2905 if((level&(~127)) == 0){
2906 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2907 }else
2908 bits+= esc_length;
2909 }
2910
2911 return bits;
2912}
2913
bb198e19
MN
2914WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
2915WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
2916WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
2917WARPER8_16_SQ(rd8x8_c, rd16_c)
2918WARPER8_16_SQ(bit8x8_c, bit16_c)
1457ab52 2919
b0368839
MN
2920/* XXX: those functions should be suppressed ASAP when all IDCTs are
2921 converted */
2922static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
2923{
2924 j_rev_dct (block);
2925 put_pixels_clamped_c(block, dest, line_size);
2926}
2927static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
2928{
2929 j_rev_dct (block);
2930 add_pixels_clamped_c(block, dest, line_size);
2931}
2932
59cf08ce
FB
2933/* init static data */
2934void dsputil_static_init(void)
e0eac44e 2935{
d2975f8d 2936 int i;
e0eac44e 2937
59cf08ce
FB
2938 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
2939 for(i=0;i<MAX_NEG_CROP;i++) {
2940 cropTbl[i] = 0;
2941 cropTbl[i + MAX_NEG_CROP + 256] = 255;
2942 }
2943
2944 for(i=0;i<512;i++) {
2945 squareTbl[i] = (i - 256) * (i - 256);
2946 }
2947
2948 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
2949}
92ddb692 2950
92ddb692 2951
59cf08ce
FB
2952void dsputil_init(DSPContext* c, AVCodecContext *avctx)
2953{
2954 int i;
de6d9b64 2955
b0368839 2956#ifdef CONFIG_ENCODERS
10acc479 2957 if(avctx->dct_algo==FF_DCT_FASTINT) {
b0368839 2958 c->fdct = fdct_ifast;
48b1f800 2959 c->fdct248 = fdct_ifast248;
10acc479
RS
2960 }
2961 else if(avctx->dct_algo==FF_DCT_FAAN) {
65e4c8c9 2962 c->fdct = ff_faandct;
48b1f800 2963 c->fdct248 = ff_faandct248;
10acc479
RS
2964 }
2965 else {
b0368839 2966 c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
10acc479
RS
2967 c->fdct248 = ff_fdct248_islow;
2968 }
b0368839
MN
2969#endif //CONFIG_ENCODERS
2970
2971 if(avctx->idct_algo==FF_IDCT_INT){
2972 c->idct_put= ff_jref_idct_put;
2973 c->idct_add= ff_jref_idct_add;
4fb518c3 2974 c->idct = j_rev_dct;
b0368839
MN
2975 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
2976 }else{ //accurate/default
2977 c->idct_put= simple_idct_put;
2978 c->idct_add= simple_idct_add;
4fb518c3 2979 c->idct = simple_idct;
b0368839
MN
2980 c->idct_permutation_type= FF_NO_IDCT_PERM;
2981 }
2982
eb4b3dd3
ZK
2983 c->get_pixels = get_pixels_c;
2984 c->diff_pixels = diff_pixels_c;
2985 c->put_pixels_clamped = put_pixels_clamped_c;
2986 c->add_pixels_clamped = add_pixels_clamped_c;
2987 c->gmc1 = gmc1_c;
2988 c->gmc = gmc_c;
2989 c->clear_blocks = clear_blocks_c;
2990 c->pix_sum = pix_sum_c;
2991 c->pix_norm1 = pix_norm1_c;
2992
45553457 2993 /* TODO [0] 16 [1] 8 */
bb198e19
MN
2994 c->pix_abs[0][0] = pix_abs16_c;
2995 c->pix_abs[0][1] = pix_abs16_x2_c;
2996 c->pix_abs[0][2] = pix_abs16_y2_c;
2997 c->pix_abs[0][3] = pix_abs16_xy2_c;
2998 c->pix_abs[1][0] = pix_abs8_c;
2999 c->pix_abs[1][1] = pix_abs8_x2_c;
3000 c->pix_abs[1][2] = pix_abs8_y2_c;
3001 c->pix_abs[1][3] = pix_abs8_xy2_c;
eb4b3dd3 3002
45553457
ZK
3003#define dspfunc(PFX, IDX, NUM) \
3004 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
3005 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \
3006 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \
3007 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
3008
3009 dspfunc(put, 0, 16);
3010 dspfunc(put_no_rnd, 0, 16);
3011 dspfunc(put, 1, 8);
3012 dspfunc(put_no_rnd, 1, 8);
669ac79c
MN
3013 dspfunc(put, 2, 4);
3014 dspfunc(put, 3, 2);
45553457
ZK
3015
3016 dspfunc(avg, 0, 16);
3017 dspfunc(avg_no_rnd, 0, 16);
3018 dspfunc(avg, 1, 8);
3019 dspfunc(avg_no_rnd, 1, 8);
da3b9756
MM
3020 dspfunc(avg, 2, 4);
3021 dspfunc(avg, 3, 2);
45553457
ZK
3022#undef dspfunc
3023
669ac79c
MN
3024 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
3025 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
3026 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
3027 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
3028 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
3029 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
3030 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
3031 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
3032 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
3033
da3b9756
MM
3034 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
3035 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
3036 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
3037 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
3038 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
3039 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
3040 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
3041 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
3042 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
3043
45553457
ZK
3044#define dspfunc(PFX, IDX, NUM) \
3045 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
3046 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
3047 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
3048 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
3049 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
3050 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
3051 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
3052 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
3053 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
3054 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
3055 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
3056 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
3057 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
3058 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
3059 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
3060 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
3061
3062 dspfunc(put_qpel, 0, 16);
3063 dspfunc(put_no_rnd_qpel, 0, 16);
3064
3065 dspfunc(avg_qpel, 0, 16);
3066 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
3067
3068 dspfunc(put_qpel, 1, 8);
3069 dspfunc(put_no_rnd_qpel, 1, 8);
3070
3071 dspfunc(avg_qpel, 1, 8);
3072 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
0da71265
MN
3073
3074 dspfunc(put_h264_qpel, 0, 16);
3075 dspfunc(put_h264_qpel, 1, 8);
3076 dspfunc(put_h264_qpel, 2, 4);
3077 dspfunc(avg_h264_qpel, 0, 16);
3078 dspfunc(avg_h264_qpel, 1, 8);
3079 dspfunc(avg_h264_qpel, 2, 4);
3080
45553457 3081#undef dspfunc
0da71265
MN
3082 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
3083 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
3084 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
3085 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
3086 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
3087 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
c9a2ebc4 3088
1457ab52
MN
3089 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
3090 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
3091 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
3092 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
3093 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
3094 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
3095 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
3096 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
669ac79c 3097
1457ab52 3098 c->hadamard8_abs = hadamard8_abs_c;
3a87ac94 3099
bb198e19
MN
3100#define SET_CMP_FUNC(name) \
3101 c->name[0]= name ## 16_c;\
3102 c->name[1]= name ## 8x8_c;
3103
3104 SET_CMP_FUNC(hadamard8_diff)
3105 SET_CMP_FUNC(dct_sad)
3106 c->sad[0]= pix_abs16_c;
3107 c->sad[1]= pix_abs8_c;
3108 c->sse[0]= sse16_c;
3109 c->sse[1]= sse8_c;
3110 SET_CMP_FUNC(quant_psnr)
3111 SET_CMP_FUNC(rd)
3112 SET_CMP_FUNC(bit)
3a87ac94 3113
11f18faf
MN
3114 c->add_bytes= add_bytes_c;
3115 c->diff_bytes= diff_bytes_c;
84705403 3116 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
3d2e8cce 3117 c->bswap_buf= bswap_buf;
332f9ac4
MN
3118
3119 c->h263_h_loop_filter= h263_h_loop_filter_c;
3120 c->h263_v_loop_filter= h263_v_loop_filter_c;
11f18faf 3121
980fc7b8 3122#ifdef HAVE_MMX
b0368839 3123 dsputil_init_mmx(c, avctx);
de6d9b64 3124#endif
3d03c0a2 3125#ifdef ARCH_ARMV4L
b0368839 3126 dsputil_init_armv4l(c, avctx);
3d03c0a2 3127#endif
c34270f5 3128#ifdef HAVE_MLIB
b0368839 3129 dsputil_init_mlib(c, avctx);
c34270f5 3130#endif
1e98dffb 3131#ifdef ARCH_ALPHA
b0368839 3132 dsputil_init_alpha(c, avctx);
1e98dffb 3133#endif
59925ef2 3134#ifdef ARCH_POWERPC
b0368839 3135 dsputil_init_ppc(c, avctx);
a43bd1d7 3136#endif
d46aba26 3137#ifdef HAVE_MMI
b0368839 3138 dsputil_init_mmi(c, avctx);
d46aba26 3139#endif
0c6bd2ea
B
3140#ifdef ARCH_SH4
3141 dsputil_init_sh4(c,avctx);
3142#endif
43f1708f 3143
b0368839
MN
3144 switch(c->idct_permutation_type){
3145 case FF_NO_IDCT_PERM:
3146 for(i=0; i<64; i++)
3147 c->idct_permutation[i]= i;
3148 break;
3149 case FF_LIBMPEG2_IDCT_PERM:
3150 for(i=0; i<64; i++)
3151 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
3152 break;
3153 case FF_SIMPLE_IDCT_PERM:
3154 for(i=0; i<64; i++)
3155 c->idct_permutation[i]= simple_mmx_permutation[i];
3156 break;
3157 case FF_TRANSPOSE_IDCT_PERM:
3158 for(i=0; i<64; i++)
3159 c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
3160 break;
3161 default:
9b879566 3162 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
b0368839 3163 }
57060b1e 3164}
b0368839 3165