useless
[libav.git] / libavcodec / dsputil.c
CommitLineData
de6d9b64
FB
1/*
2 * DSP utils
ff4ec49e 3 * Copyright (c) 2000, 2001 Fabrice Bellard.
8f2ab833 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
de6d9b64 5 *
ff4ec49e
FB
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
de6d9b64 10 *
ff4ec49e 11 * This library is distributed in the hope that it will be useful,
de6d9b64 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ff4ec49e
FB
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
de6d9b64 15 *
ff4ec49e
FB
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
7ff037e9 19 *
59fe111e 20 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
de6d9b64 21 */
983e3246
MN
22
23/**
24 * @file dsputil.c
25 * DSP utils
26 */
27
de6d9b64
FB
28#include "avcodec.h"
29#include "dsputil.h"
1457ab52 30#include "mpegvideo.h"
b0368839 31#include "simple_idct.h"
65e4c8c9 32#include "faandct.h"
5596c60c 33
8b69867f
MN
34uint8_t cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
35uint32_t squareTbl[512] = {0, };
de6d9b64 36
0c1a9eda 37const uint8_t ff_zigzag_direct[64] = {
2ad1516a
MN
38 0, 1, 8, 16, 9, 2, 3, 10,
39 17, 24, 32, 25, 18, 11, 4, 5,
e0eac44e 40 12, 19, 26, 33, 40, 48, 41, 34,
2ad1516a 41 27, 20, 13, 6, 7, 14, 21, 28,
e0eac44e
FB
42 35, 42, 49, 56, 57, 50, 43, 36,
43 29, 22, 15, 23, 30, 37, 44, 51,
44 58, 59, 52, 45, 38, 31, 39, 46,
45 53, 60, 61, 54, 47, 55, 62, 63
46};
47
10acc479
RS
48/* Specific zigzag scan for 248 idct. NOTE that unlike the
49 specification, we interleave the fields */
50const uint8_t ff_zigzag248_direct[64] = {
51 0, 8, 1, 9, 16, 24, 2, 10,
52 17, 25, 32, 40, 48, 56, 33, 41,
53 18, 26, 3, 11, 4, 12, 19, 27,
54 34, 42, 49, 57, 50, 58, 35, 43,
55 20, 28, 5, 13, 6, 14, 21, 29,
56 36, 44, 51, 59, 52, 60, 37, 45,
57 22, 30, 7, 15, 23, 31, 38, 46,
58 53, 61, 54, 62, 39, 47, 55, 63,
59};
60
2f349de2 61/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
8b69867f 62uint16_t __align8 inv_zigzag_direct16[64] = {0, };
2f349de2 63
0c1a9eda 64const uint8_t ff_alternate_horizontal_scan[64] = {
2ad1516a 65 0, 1, 2, 3, 8, 9, 16, 17,
e0eac44e
FB
66 10, 11, 4, 5, 6, 7, 15, 14,
67 13, 12, 19, 18, 24, 25, 32, 33,
68 26, 27, 20, 21, 22, 23, 28, 29,
69 30, 31, 34, 35, 40, 41, 48, 49,
70 42, 43, 36, 37, 38, 39, 44, 45,
71 46, 47, 50, 51, 56, 57, 58, 59,
72 52, 53, 54, 55, 60, 61, 62, 63,
73};
74
0c1a9eda 75const uint8_t ff_alternate_vertical_scan[64] = {
2ad1516a 76 0, 8, 16, 24, 1, 9, 2, 10,
e0eac44e
FB
77 17, 25, 32, 40, 48, 56, 57, 49,
78 41, 33, 26, 18, 3, 11, 4, 12,
79 19, 27, 34, 42, 50, 58, 35, 43,
80 51, 59, 20, 28, 5, 13, 6, 14,
81 21, 29, 36, 44, 52, 60, 37, 45,
82 53, 61, 22, 30, 7, 15, 23, 31,
83 38, 46, 54, 62, 39, 47, 55, 63,
84};
85
2f349de2 86/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
0c1a9eda 87const uint32_t inverse[256]={
2f349de2
MN
88 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
89 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
90 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
91 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
92 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
93 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
94 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
95 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
96 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
97 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
98 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
99 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
100 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
101 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
102 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
103 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
104 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
105 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
106 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
107 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
108 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
109 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
110 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
111 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
112 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
113 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
114 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
115 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
116 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
117 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
118 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
119 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
120};
121
b0368839
MN
122/* Input permutation for the simple_idct_mmx */
123static const uint8_t simple_mmx_permutation[64]={
124 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
125 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
126 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
127 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
128 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
129 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
130 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
131 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
132};
133
0c1a9eda 134static int pix_sum_c(uint8_t * pix, int line_size)
3aa102be
MN
135{
136 int s, i, j;
137
138 s = 0;
139 for (i = 0; i < 16; i++) {
140 for (j = 0; j < 16; j += 8) {
141 s += pix[0];
142 s += pix[1];
143 s += pix[2];
144 s += pix[3];
145 s += pix[4];
146 s += pix[5];
147 s += pix[6];
148 s += pix[7];
149 pix += 8;
150 }
151 pix += line_size - 16;
152 }
153 return s;
154}
155
0c1a9eda 156static int pix_norm1_c(uint8_t * pix, int line_size)
3aa102be
MN
157{
158 int s, i, j;
0c1a9eda 159 uint32_t *sq = squareTbl + 256;
3aa102be
MN
160
161 s = 0;
162 for (i = 0; i < 16; i++) {
163 for (j = 0; j < 16; j += 8) {
2a006cd3 164#if 0
3aa102be
MN
165 s += sq[pix[0]];
166 s += sq[pix[1]];
167 s += sq[pix[2]];
168 s += sq[pix[3]];
169 s += sq[pix[4]];
170 s += sq[pix[5]];
171 s += sq[pix[6]];
172 s += sq[pix[7]];
2a006cd3
FL
173#else
174#if LONG_MAX > 2147483647
175 register uint64_t x=*(uint64_t*)pix;
176 s += sq[x&0xff];
177 s += sq[(x>>8)&0xff];
178 s += sq[(x>>16)&0xff];
179 s += sq[(x>>24)&0xff];
180 s += sq[(x>>32)&0xff];
181 s += sq[(x>>40)&0xff];
182 s += sq[(x>>48)&0xff];
183 s += sq[(x>>56)&0xff];
184#else
185 register uint32_t x=*(uint32_t*)pix;
186 s += sq[x&0xff];
187 s += sq[(x>>8)&0xff];
188 s += sq[(x>>16)&0xff];
189 s += sq[(x>>24)&0xff];
190 x=*(uint32_t*)(pix+4);
191 s += sq[x&0xff];
192 s += sq[(x>>8)&0xff];
193 s += sq[(x>>16)&0xff];
194 s += sq[(x>>24)&0xff];
195#endif
196#endif
3aa102be
MN
197 pix += 8;
198 }
199 pix += line_size - 16;
200 }
201 return s;
202}
203
3d2e8cce
MN
204static void bswap_buf(uint32_t *dst, uint32_t *src, int w){
205 int i;
206
207 for(i=0; i+8<=w; i+=8){
208 dst[i+0]= bswap_32(src[i+0]);
209 dst[i+1]= bswap_32(src[i+1]);
210 dst[i+2]= bswap_32(src[i+2]);
211 dst[i+3]= bswap_32(src[i+3]);
212 dst[i+4]= bswap_32(src[i+4]);
213 dst[i+5]= bswap_32(src[i+5]);
214 dst[i+6]= bswap_32(src[i+6]);
215 dst[i+7]= bswap_32(src[i+7]);
216 }
217 for(;i<w; i++){
218 dst[i+0]= bswap_32(src[i+0]);
219 }
220}
3aa102be 221
26efc54e
MN
222static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
223{
224 int s, i;
225 uint32_t *sq = squareTbl + 256;
226
227 s = 0;
228 for (i = 0; i < h; i++) {
229 s += sq[pix1[0] - pix2[0]];
230 s += sq[pix1[1] - pix2[1]];
231 s += sq[pix1[2] - pix2[2]];
232 s += sq[pix1[3] - pix2[3]];
233 pix1 += line_size;
234 pix2 += line_size;
235 }
236 return s;
237}
238
bb198e19 239static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
1457ab52
MN
240{
241 int s, i;
0c1a9eda 242 uint32_t *sq = squareTbl + 256;
1457ab52
MN
243
244 s = 0;
bb198e19 245 for (i = 0; i < h; i++) {
1457ab52
MN
246 s += sq[pix1[0] - pix2[0]];
247 s += sq[pix1[1] - pix2[1]];
248 s += sq[pix1[2] - pix2[2]];
249 s += sq[pix1[3] - pix2[3]];
250 s += sq[pix1[4] - pix2[4]];
251 s += sq[pix1[5] - pix2[5]];
252 s += sq[pix1[6] - pix2[6]];
253 s += sq[pix1[7] - pix2[7]];
254 pix1 += line_size;
255 pix2 += line_size;
256 }
257 return s;
258}
259
bb198e19 260static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
9c76bd48 261{
6b026927
FH
262 int s, i;
263 uint32_t *sq = squareTbl + 256;
9c76bd48
BF
264
265 s = 0;
bb198e19 266 for (i = 0; i < h; i++) {
6b026927
FH
267 s += sq[pix1[ 0] - pix2[ 0]];
268 s += sq[pix1[ 1] - pix2[ 1]];
269 s += sq[pix1[ 2] - pix2[ 2]];
270 s += sq[pix1[ 3] - pix2[ 3]];
271 s += sq[pix1[ 4] - pix2[ 4]];
272 s += sq[pix1[ 5] - pix2[ 5]];
273 s += sq[pix1[ 6] - pix2[ 6]];
274 s += sq[pix1[ 7] - pix2[ 7]];
275 s += sq[pix1[ 8] - pix2[ 8]];
276 s += sq[pix1[ 9] - pix2[ 9]];
277 s += sq[pix1[10] - pix2[10]];
278 s += sq[pix1[11] - pix2[11]];
279 s += sq[pix1[12] - pix2[12]];
280 s += sq[pix1[13] - pix2[13]];
281 s += sq[pix1[14] - pix2[14]];
282 s += sq[pix1[15] - pix2[15]];
2a006cd3 283
6b026927
FH
284 pix1 += line_size;
285 pix2 += line_size;
9c76bd48
BF
286 }
287 return s;
288}
289
26efc54e
MN
290
291static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){
292 int s, i, j;
293 const int dec_count= w==8 ? 3 : 4;
294 int tmp[16*16];
295#if 0
296 int level, ori;
297 static const int scale[2][2][4][4]={
298 {
299 {
300 //8x8 dec=3
301 {268, 239, 239, 213},
302 { 0, 224, 224, 152},
303 { 0, 135, 135, 110},
304 },{
305 //16x16 dec=4
306 {344, 310, 310, 280},
307 { 0, 320, 320, 228},
308 { 0, 175, 175, 136},
309 { 0, 129, 129, 102},
310 }
311 },{
312 {//FIXME 5/3
313 //8x8 dec=3
314 {275, 245, 245, 218},
315 { 0, 230, 230, 156},
316 { 0, 138, 138, 113},
317 },{
318 //16x16 dec=4
319 {352, 317, 317, 286},
320 { 0, 328, 328, 233},
321 { 0, 180, 180, 140},
322 { 0, 132, 132, 105},
323 }
324 }
325 };
326#endif
327
328 for (i = 0; i < h; i++) {
329 for (j = 0; j < w; j+=4) {
330 tmp[16*i+j+0] = (pix1[j+0] - pix2[j+0])<<4;
331 tmp[16*i+j+1] = (pix1[j+1] - pix2[j+1])<<4;
332 tmp[16*i+j+2] = (pix1[j+2] - pix2[j+2])<<4;
333 tmp[16*i+j+3] = (pix1[j+3] - pix2[j+3])<<4;
334 }
335 pix1 += line_size;
336 pix2 += line_size;
337 }
338 ff_spatial_dwt(tmp, w, h, 16, type, dec_count);
339
340 s=0;
341#if 0
342 for(level=0; level<dec_count; level++){
343 for(ori= level ? 1 : 0; ori<4; ori++){
344 int sx= (ori&1) ? 1<<level: 0;
345 int stride= 16<<(dec_count-level);
346 int sy= (ori&2) ? stride>>1 : 0;
347 int size= 1<<level;
348
349 for(i=0; i<size; i++){
350 for(j=0; j<size; j++){
351 int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
352 s += ABS(v);
353 }
354 }
355 }
356 }
357#endif
358 for (i = 0; i < h; i++) {
359 for (j = 0; j < w; j+=4) {
360 s+= ABS(tmp[16*i+j+0]);
361 s+= ABS(tmp[16*i+j+1]);
362 s+= ABS(tmp[16*i+j+2]);
363 s+= ABS(tmp[16*i+j+3]);
364 }
365 }
366 assert(s>=0);
367
368 return s>>2;
369}
370
371static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
372 return w_c(v, pix1, pix2, line_size, 8, h, 1);
373}
374
375static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
376 return w_c(v, pix1, pix2, line_size, 8, h, 0);
377}
378
379static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
380 return w_c(v, pix1, pix2, line_size, 16, h, 1);
381}
382
383static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
384 return w_c(v, pix1, pix2, line_size, 16, h, 0);
385}
386
0c1a9eda 387static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
de6d9b64 388{
de6d9b64
FB
389 int i;
390
391 /* read the pixels */
de6d9b64 392 for(i=0;i<8;i++) {
c13e1abd
FH
393 block[0] = pixels[0];
394 block[1] = pixels[1];
395 block[2] = pixels[2];
396 block[3] = pixels[3];
397 block[4] = pixels[4];
398 block[5] = pixels[5];
399 block[6] = pixels[6];
400 block[7] = pixels[7];
401 pixels += line_size;
402 block += 8;
de6d9b64
FB
403 }
404}
405
0c1a9eda
ZK
406static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
407 const uint8_t *s2, int stride){
9dbcbd92
MN
408 int i;
409
410 /* read the pixels */
9dbcbd92 411 for(i=0;i<8;i++) {
c13e1abd
FH
412 block[0] = s1[0] - s2[0];
413 block[1] = s1[1] - s2[1];
414 block[2] = s1[2] - s2[2];
415 block[3] = s1[3] - s2[3];
416 block[4] = s1[4] - s2[4];
417 block[5] = s1[5] - s2[5];
418 block[6] = s1[6] - s2[6];
419 block[7] = s1[7] - s2[7];
9dbcbd92
MN
420 s1 += stride;
421 s2 += stride;
c13e1abd 422 block += 8;
9dbcbd92
MN
423 }
424}
425
426
0c1a9eda 427static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
eb4b3dd3 428 int line_size)
de6d9b64 429{
de6d9b64 430 int i;
0c1a9eda 431 uint8_t *cm = cropTbl + MAX_NEG_CROP;
de6d9b64
FB
432
433 /* read the pixels */
de6d9b64 434 for(i=0;i<8;i++) {
c13e1abd
FH
435 pixels[0] = cm[block[0]];
436 pixels[1] = cm[block[1]];
437 pixels[2] = cm[block[2]];
438 pixels[3] = cm[block[3]];
439 pixels[4] = cm[block[4]];
440 pixels[5] = cm[block[5]];
441 pixels[6] = cm[block[6]];
442 pixels[7] = cm[block[7]];
443
444 pixels += line_size;
445 block += 8;
de6d9b64
FB
446 }
447}
448
f9ed9d85
MM
449static void put_signed_pixels_clamped_c(const DCTELEM *block,
450 uint8_t *restrict pixels,
451 int line_size)
452{
453 int i, j;
454
455 for (i = 0; i < 8; i++) {
456 for (j = 0; j < 8; j++) {
457 if (*block < -128)
458 *pixels = 0;
459 else if (*block > 127)
460 *pixels = 255;
461 else
462 *pixels = (uint8_t)(*block + 128);
463 block++;
464 pixels++;
465 }
466 pixels += (line_size - 8);
467 }
468}
469
0c1a9eda 470static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
c13e1abd 471 int line_size)
de6d9b64 472{
de6d9b64 473 int i;
0c1a9eda 474 uint8_t *cm = cropTbl + MAX_NEG_CROP;
de6d9b64
FB
475
476 /* read the pixels */
de6d9b64 477 for(i=0;i<8;i++) {
c13e1abd
FH
478 pixels[0] = cm[pixels[0] + block[0]];
479 pixels[1] = cm[pixels[1] + block[1]];
480 pixels[2] = cm[pixels[2] + block[2]];
481 pixels[3] = cm[pixels[3] + block[3]];
482 pixels[4] = cm[pixels[4] + block[4]];
483 pixels[5] = cm[pixels[5] + block[5]];
484 pixels[6] = cm[pixels[6] + block[6]];
485 pixels[7] = cm[pixels[7] + block[7]];
486 pixels += line_size;
487 block += 8;
de6d9b64
FB
488 }
489}
59fe111e
MN
490#if 0
491
492#define PIXOP2(OPNAME, OP) \
b3184779 493static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
494{\
495 int i;\
496 for(i=0; i<h; i++){\
497 OP(*((uint64_t*)block), LD64(pixels));\
498 pixels+=line_size;\
499 block +=line_size;\
500 }\
501}\
502\
45553457 503static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
504{\
505 int i;\
506 for(i=0; i<h; i++){\
507 const uint64_t a= LD64(pixels );\
508 const uint64_t b= LD64(pixels+1);\
509 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
510 pixels+=line_size;\
511 block +=line_size;\
512 }\
513}\
514\
45553457 515static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
516{\
517 int i;\
518 for(i=0; i<h; i++){\
519 const uint64_t a= LD64(pixels );\
520 const uint64_t b= LD64(pixels+1);\
521 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
522 pixels+=line_size;\
523 block +=line_size;\
524 }\
525}\
526\
45553457 527static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
528{\
529 int i;\
530 for(i=0; i<h; i++){\
531 const uint64_t a= LD64(pixels );\
532 const uint64_t b= LD64(pixels+line_size);\
533 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
534 pixels+=line_size;\
535 block +=line_size;\
536 }\
537}\
538\
45553457 539static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
540{\
541 int i;\
542 for(i=0; i<h; i++){\
543 const uint64_t a= LD64(pixels );\
544 const uint64_t b= LD64(pixels+line_size);\
545 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
546 pixels+=line_size;\
547 block +=line_size;\
548 }\
549}\
550\
45553457 551static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
552{\
553 int i;\
554 const uint64_t a= LD64(pixels );\
555 const uint64_t b= LD64(pixels+1);\
556 uint64_t l0= (a&0x0303030303030303ULL)\
557 + (b&0x0303030303030303ULL)\
558 + 0x0202020202020202ULL;\
559 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
560 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
561 uint64_t l1,h1;\
562\
563 pixels+=line_size;\
564 for(i=0; i<h; i+=2){\
565 uint64_t a= LD64(pixels );\
566 uint64_t b= LD64(pixels+1);\
567 l1= (a&0x0303030303030303ULL)\
568 + (b&0x0303030303030303ULL);\
569 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
570 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
571 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
572 pixels+=line_size;\
573 block +=line_size;\
574 a= LD64(pixels );\
575 b= LD64(pixels+1);\
576 l0= (a&0x0303030303030303ULL)\
577 + (b&0x0303030303030303ULL)\
578 + 0x0202020202020202ULL;\
579 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
580 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
581 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
582 pixels+=line_size;\
583 block +=line_size;\
584 }\
585}\
586\
45553457 587static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
588{\
589 int i;\
590 const uint64_t a= LD64(pixels );\
591 const uint64_t b= LD64(pixels+1);\
592 uint64_t l0= (a&0x0303030303030303ULL)\
593 + (b&0x0303030303030303ULL)\
594 + 0x0101010101010101ULL;\
595 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
596 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
597 uint64_t l1,h1;\
598\
599 pixels+=line_size;\
600 for(i=0; i<h; i+=2){\
601 uint64_t a= LD64(pixels );\
602 uint64_t b= LD64(pixels+1);\
603 l1= (a&0x0303030303030303ULL)\
604 + (b&0x0303030303030303ULL);\
605 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
606 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
607 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
608 pixels+=line_size;\
609 block +=line_size;\
610 a= LD64(pixels );\
611 b= LD64(pixels+1);\
612 l0= (a&0x0303030303030303ULL)\
613 + (b&0x0303030303030303ULL)\
614 + 0x0101010101010101ULL;\
615 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
616 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
617 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
618 pixels+=line_size;\
619 block +=line_size;\
620 }\
621}\
622\
45553457
ZK
623CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
624CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
625CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
626CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
627CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
628CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
629CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
59fe111e
MN
630
631#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
632#else // 64 bit variant
633
634#define PIXOP2(OPNAME, OP) \
669ac79c
MN
635static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
636 int i;\
637 for(i=0; i<h; i++){\
638 OP(*((uint16_t*)(block )), LD16(pixels ));\
639 pixels+=line_size;\
640 block +=line_size;\
641 }\
642}\
0da71265
MN
643static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
644 int i;\
645 for(i=0; i<h; i++){\
646 OP(*((uint32_t*)(block )), LD32(pixels ));\
647 pixels+=line_size;\
648 block +=line_size;\
649 }\
650}\
45553457 651static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
59fe111e
MN
652 int i;\
653 for(i=0; i<h; i++){\
654 OP(*((uint32_t*)(block )), LD32(pixels ));\
655 OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
656 pixels+=line_size;\
657 block +=line_size;\
658 }\
659}\
45553457
ZK
660static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
661 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
b3184779 662}\
59fe111e 663\
b3184779
MN
664static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
665 int src_stride1, int src_stride2, int h){\
59fe111e
MN
666 int i;\
667 for(i=0; i<h; i++){\
b3184779
MN
668 uint32_t a,b;\
669 a= LD32(&src1[i*src_stride1 ]);\
670 b= LD32(&src2[i*src_stride2 ]);\
d8085ea7 671 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
b3184779
MN
672 a= LD32(&src1[i*src_stride1+4]);\
673 b= LD32(&src2[i*src_stride2+4]);\
d8085ea7 674 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
59fe111e
MN
675 }\
676}\
677\
b3184779
MN
678static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
679 int src_stride1, int src_stride2, int h){\
59fe111e
MN
680 int i;\
681 for(i=0; i<h; i++){\
b3184779
MN
682 uint32_t a,b;\
683 a= LD32(&src1[i*src_stride1 ]);\
684 b= LD32(&src2[i*src_stride2 ]);\
d8085ea7 685 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
b3184779
MN
686 a= LD32(&src1[i*src_stride1+4]);\
687 b= LD32(&src2[i*src_stride2+4]);\
d8085ea7 688 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
59fe111e
MN
689 }\
690}\
691\
0da71265
MN
692static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
693 int src_stride1, int src_stride2, int h){\
694 int i;\
695 for(i=0; i<h; i++){\
696 uint32_t a,b;\
697 a= LD32(&src1[i*src_stride1 ]);\
698 b= LD32(&src2[i*src_stride2 ]);\
d8085ea7 699 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
0da71265
MN
700 }\
701}\
702\
669ac79c
MN
703static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
704 int src_stride1, int src_stride2, int h){\
705 int i;\
706 for(i=0; i<h; i++){\
707 uint32_t a,b;\
708 a= LD16(&src1[i*src_stride1 ]);\
709 b= LD16(&src2[i*src_stride2 ]);\
710 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
711 }\
712}\
713\
b3184779
MN
714static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
715 int src_stride1, int src_stride2, int h){\
716 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
717 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
718}\
719\
720static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
721 int src_stride1, int src_stride2, int h){\
722 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
723 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
724}\
725\
45553457 726static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
727 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
728}\
729\
45553457 730static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
731 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
732}\
733\
45553457 734static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
735 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
736}\
737\
45553457 738static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
739 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
740}\
741\
742static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
743 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
59fe111e
MN
744 int i;\
745 for(i=0; i<h; i++){\
b3184779
MN
746 uint32_t a, b, c, d, l0, l1, h0, h1;\
747 a= LD32(&src1[i*src_stride1]);\
748 b= LD32(&src2[i*src_stride2]);\
749 c= LD32(&src3[i*src_stride3]);\
750 d= LD32(&src4[i*src_stride4]);\
751 l0= (a&0x03030303UL)\
752 + (b&0x03030303UL)\
753 + 0x02020202UL;\
754 h0= ((a&0xFCFCFCFCUL)>>2)\
755 + ((b&0xFCFCFCFCUL)>>2);\
756 l1= (c&0x03030303UL)\
757 + (d&0x03030303UL);\
758 h1= ((c&0xFCFCFCFCUL)>>2)\
759 + ((d&0xFCFCFCFCUL)>>2);\
760 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
761 a= LD32(&src1[i*src_stride1+4]);\
762 b= LD32(&src2[i*src_stride2+4]);\
763 c= LD32(&src3[i*src_stride3+4]);\
764 d= LD32(&src4[i*src_stride4+4]);\
765 l0= (a&0x03030303UL)\
766 + (b&0x03030303UL)\
767 + 0x02020202UL;\
768 h0= ((a&0xFCFCFCFCUL)>>2)\
769 + ((b&0xFCFCFCFCUL)>>2);\
770 l1= (c&0x03030303UL)\
771 + (d&0x03030303UL);\
772 h1= ((c&0xFCFCFCFCUL)>>2)\
773 + ((d&0xFCFCFCFCUL)>>2);\
774 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
59fe111e
MN
775 }\
776}\
669ac79c
MN
777\
778static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
779 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
780}\
781\
782static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
783 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
784}\
785\
786static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
787 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
788}\
789\
790static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
791 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
792}\
793\
b3184779
MN
794static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
795 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
59fe111e
MN
796 int i;\
797 for(i=0; i<h; i++){\
b3184779
MN
798 uint32_t a, b, c, d, l0, l1, h0, h1;\
799 a= LD32(&src1[i*src_stride1]);\
800 b= LD32(&src2[i*src_stride2]);\
801 c= LD32(&src3[i*src_stride3]);\
802 d= LD32(&src4[i*src_stride4]);\
803 l0= (a&0x03030303UL)\
804 + (b&0x03030303UL)\
805 + 0x01010101UL;\
806 h0= ((a&0xFCFCFCFCUL)>>2)\
807 + ((b&0xFCFCFCFCUL)>>2);\
808 l1= (c&0x03030303UL)\
809 + (d&0x03030303UL);\
810 h1= ((c&0xFCFCFCFCUL)>>2)\
811 + ((d&0xFCFCFCFCUL)>>2);\
812 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
813 a= LD32(&src1[i*src_stride1+4]);\
814 b= LD32(&src2[i*src_stride2+4]);\
815 c= LD32(&src3[i*src_stride3+4]);\
816 d= LD32(&src4[i*src_stride4+4]);\
817 l0= (a&0x03030303UL)\
818 + (b&0x03030303UL)\
819 + 0x01010101UL;\
820 h0= ((a&0xFCFCFCFCUL)>>2)\
821 + ((b&0xFCFCFCFCUL)>>2);\
822 l1= (c&0x03030303UL)\
823 + (d&0x03030303UL);\
824 h1= ((c&0xFCFCFCFCUL)>>2)\
825 + ((d&0xFCFCFCFCUL)>>2);\
826 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
59fe111e
MN
827 }\
828}\
b3184779
MN
829static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
830 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
831 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
832 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
833}\
834static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
835 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
836 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
837 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
838}\
59fe111e 839\
669ac79c
MN
840static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
841{\
842 int i, a0, b0, a1, b1;\
843 a0= pixels[0];\
844 b0= pixels[1] + 2;\
845 a0 += b0;\
846 b0 += pixels[2];\
847\
848 pixels+=line_size;\
849 for(i=0; i<h; i+=2){\
850 a1= pixels[0];\
851 b1= pixels[1];\
852 a1 += b1;\
853 b1 += pixels[2];\
854\
855 block[0]= (a1+a0)>>2; /* FIXME non put */\
856 block[1]= (b1+b0)>>2;\
857\
858 pixels+=line_size;\
859 block +=line_size;\
860\
861 a0= pixels[0];\
862 b0= pixels[1] + 2;\
863 a0 += b0;\
864 b0 += pixels[2];\
865\
866 block[0]= (a1+a0)>>2;\
867 block[1]= (b1+b0)>>2;\
868 pixels+=line_size;\
869 block +=line_size;\
870 }\
871}\
872\
873static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
874{\
875 int i;\
876 const uint32_t a= LD32(pixels );\
877 const uint32_t b= LD32(pixels+1);\
878 uint32_t l0= (a&0x03030303UL)\
879 + (b&0x03030303UL)\
880 + 0x02020202UL;\
881 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
882 + ((b&0xFCFCFCFCUL)>>2);\
883 uint32_t l1,h1;\
884\
885 pixels+=line_size;\
886 for(i=0; i<h; i+=2){\
887 uint32_t a= LD32(pixels );\
888 uint32_t b= LD32(pixels+1);\
889 l1= (a&0x03030303UL)\
890 + (b&0x03030303UL);\
891 h1= ((a&0xFCFCFCFCUL)>>2)\
892 + ((b&0xFCFCFCFCUL)>>2);\
893 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
894 pixels+=line_size;\
895 block +=line_size;\
896 a= LD32(pixels );\
897 b= LD32(pixels+1);\
898 l0= (a&0x03030303UL)\
899 + (b&0x03030303UL)\
900 + 0x02020202UL;\
901 h0= ((a&0xFCFCFCFCUL)>>2)\
902 + ((b&0xFCFCFCFCUL)>>2);\
903 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
904 pixels+=line_size;\
905 block +=line_size;\
906 }\
907}\
908\
45553457 909static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
910{\
911 int j;\
912 for(j=0; j<2; j++){\
913 int i;\
914 const uint32_t a= LD32(pixels );\
915 const uint32_t b= LD32(pixels+1);\
916 uint32_t l0= (a&0x03030303UL)\
917 + (b&0x03030303UL)\
918 + 0x02020202UL;\
919 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
920 + ((b&0xFCFCFCFCUL)>>2);\
921 uint32_t l1,h1;\
922\
923 pixels+=line_size;\
924 for(i=0; i<h; i+=2){\
925 uint32_t a= LD32(pixels );\
926 uint32_t b= LD32(pixels+1);\
927 l1= (a&0x03030303UL)\
928 + (b&0x03030303UL);\
929 h1= ((a&0xFCFCFCFCUL)>>2)\
930 + ((b&0xFCFCFCFCUL)>>2);\
931 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
932 pixels+=line_size;\
933 block +=line_size;\
934 a= LD32(pixels );\
935 b= LD32(pixels+1);\
936 l0= (a&0x03030303UL)\
937 + (b&0x03030303UL)\
938 + 0x02020202UL;\
939 h0= ((a&0xFCFCFCFCUL)>>2)\
940 + ((b&0xFCFCFCFCUL)>>2);\
941 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
942 pixels+=line_size;\
943 block +=line_size;\
944 }\
945 pixels+=4-line_size*(h+1);\
946 block +=4-line_size*h;\
947 }\
948}\
949\
45553457 950static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
951{\
952 int j;\
953 for(j=0; j<2; j++){\
954 int i;\
955 const uint32_t a= LD32(pixels );\
956 const uint32_t b= LD32(pixels+1);\
957 uint32_t l0= (a&0x03030303UL)\
958 + (b&0x03030303UL)\
959 + 0x01010101UL;\
960 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
961 + ((b&0xFCFCFCFCUL)>>2);\
962 uint32_t l1,h1;\
963\
964 pixels+=line_size;\
965 for(i=0; i<h; i+=2){\
966 uint32_t a= LD32(pixels );\
967 uint32_t b= LD32(pixels+1);\
968 l1= (a&0x03030303UL)\
969 + (b&0x03030303UL);\
970 h1= ((a&0xFCFCFCFCUL)>>2)\
971 + ((b&0xFCFCFCFCUL)>>2);\
972 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
973 pixels+=line_size;\
974 block +=line_size;\
975 a= LD32(pixels );\
976 b= LD32(pixels+1);\
977 l0= (a&0x03030303UL)\
978 + (b&0x03030303UL)\
979 + 0x01010101UL;\
980 h0= ((a&0xFCFCFCFCUL)>>2)\
981 + ((b&0xFCFCFCFCUL)>>2);\
982 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
983 pixels+=line_size;\
984 block +=line_size;\
985 }\
986 pixels+=4-line_size*(h+1);\
987 block +=4-line_size*h;\
988 }\
989}\
990\
45553457
ZK
991CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
992CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
993CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
994CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
995CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
996CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
997CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
998CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
b3184779 999
d8085ea7 1000#define op_avg(a, b) a = rnd_avg32(a, b)
59fe111e 1001#endif
59fe111e
MN
1002#define op_put(a, b) a = b
1003
1004PIXOP2(avg, op_avg)
1005PIXOP2(put, op_put)
1006#undef op_avg
1007#undef op_put
1008
de6d9b64
FB
1009#define avg2(a,b) ((a+b+1)>>1)
1010#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
1011
c0a0170c
MN
1012static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
1013 put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
1014}
1015
1016static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
1017 put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
1018}
073b013d 1019
0c1a9eda 1020static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
44eb4951
MN
1021{
1022 const int A=(16-x16)*(16-y16);
1023 const int B=( x16)*(16-y16);
1024 const int C=(16-x16)*( y16);
1025 const int D=( x16)*( y16);
1026 int i;
44eb4951
MN
1027
1028 for(i=0; i<h; i++)
1029 {
b3184779
MN
1030 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
1031 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
1032 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
1033 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
1034 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
1035 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
1036 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
1037 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
1038 dst+= stride;
1039 src+= stride;
44eb4951
MN
1040 }
1041}
1042
0c1a9eda 1043static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
073b013d
MN
1044 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
1045{
1046 int y, vx, vy;
1047 const int s= 1<<shift;
1048
1049 width--;
1050 height--;
1051
1052 for(y=0; y<h; y++){
1053 int x;
1054
1055 vx= ox;
1056 vy= oy;
1057 for(x=0; x<8; x++){ //XXX FIXME optimize
1058 int src_x, src_y, frac_x, frac_y, index;
1059
1060 src_x= vx>>16;
1061 src_y= vy>>16;
1062 frac_x= src_x&(s-1);
1063 frac_y= src_y&(s-1);
1064 src_x>>=shift;
1065 src_y>>=shift;
1066
1067 if((unsigned)src_x < width){
1068 if((unsigned)src_y < height){
1069 index= src_x + src_y*stride;
1070 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
1071 + src[index +1]* frac_x )*(s-frac_y)
1072 + ( src[index+stride ]*(s-frac_x)
1073 + src[index+stride+1]* frac_x )* frac_y
1074 + r)>>(shift*2);
1075 }else{
1076 index= src_x + clip(src_y, 0, height)*stride;
1077 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
1078 + src[index +1]* frac_x )*s
1079 + r)>>(shift*2);
1080 }
1081 }else{
1082 if((unsigned)src_y < height){
1083 index= clip(src_x, 0, width) + src_y*stride;
1084 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
1085 + src[index+stride ]* frac_y )*s
1086 + r)>>(shift*2);
1087 }else{
1088 index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride;
1089 dst[y*stride + x]= src[index ];
1090 }
1091 }
1092
1093 vx+= dxx;
1094 vy+= dyx;
1095 }
1096 ox += dxy;
1097 oy += dyy;
1098 }
1099}
669ac79c
MN
1100
1101static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1102 switch(width){
1103 case 2: put_pixels2_c (dst, src, stride, height); break;
1104 case 4: put_pixels4_c (dst, src, stride, height); break;
1105 case 8: put_pixels8_c (dst, src, stride, height); break;
1106 case 16:put_pixels16_c(dst, src, stride, height); break;
1107 }
1108}
1109
1110static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1111 int i,j;
1112 for (i=0; i < height; i++) {
1113 for (j=0; j < width; j++) {
1114 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
1115 }
1116 src += stride;
1117 dst += stride;
1118 }
1119}
1120
1121static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1122 int i,j;
1123 for (i=0; i < height; i++) {
1124 for (j=0; j < width; j++) {
1125 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
1126 }
1127 src += stride;
1128 dst += stride;
1129 }
1130}
1131
1132static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1133 int i,j;
1134 for (i=0; i < height; i++) {
1135 for (j=0; j < width; j++) {
1136 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
1137 }
1138 src += stride;
1139 dst += stride;
1140 }
1141}
1142
1143static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1144 int i,j;
1145 for (i=0; i < height; i++) {
1146 for (j=0; j < width; j++) {
1147 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
1148 }
1149 src += stride;
1150 dst += stride;
1151 }
1152}
1153
1154static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1155 int i,j;
1156 for (i=0; i < height; i++) {
1157 for (j=0; j < width; j++) {
89ebf4e8 1158 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
1159 }
1160 src += stride;
1161 dst += stride;
1162 }
1163}
1164
1165static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1166 int i,j;
1167 for (i=0; i < height; i++) {
1168 for (j=0; j < width; j++) {
1169 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
1170 }
1171 src += stride;
1172 dst += stride;
1173 }
1174}
1175
1176static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1177 int i,j;
1178 for (i=0; i < height; i++) {
1179 for (j=0; j < width; j++) {
89ebf4e8 1180 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
1181 }
1182 src += stride;
1183 dst += stride;
1184 }
1185}
1186
1187static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1188 int i,j;
1189 for (i=0; i < height; i++) {
1190 for (j=0; j < width; j++) {
1191 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
1192 }
1193 src += stride;
1194 dst += stride;
1195 }
1196}
da3b9756
MM
1197
1198static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1199 switch(width){
1200 case 2: avg_pixels2_c (dst, src, stride, height); break;
1201 case 4: avg_pixels4_c (dst, src, stride, height); break;
1202 case 8: avg_pixels8_c (dst, src, stride, height); break;
1203 case 16:avg_pixels16_c(dst, src, stride, height); break;
1204 }
1205}
1206
1207static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1208 int i,j;
1209 for (i=0; i < height; i++) {
1210 for (j=0; j < width; j++) {
1211 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
1212 }
1213 src += stride;
1214 dst += stride;
1215 }
1216}
1217
1218static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1219 int i,j;
1220 for (i=0; i < height; i++) {
1221 for (j=0; j < width; j++) {
1222 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
1223 }
1224 src += stride;
1225 dst += stride;
1226 }
1227}
1228
1229static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1230 int i,j;
1231 for (i=0; i < height; i++) {
1232 for (j=0; j < width; j++) {
1233 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
1234 }
1235 src += stride;
1236 dst += stride;
1237 }
1238}
1239
1240static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1241 int i,j;
1242 for (i=0; i < height; i++) {
1243 for (j=0; j < width; j++) {
1244 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1245 }
1246 src += stride;
1247 dst += stride;
1248 }
1249}
1250
1251static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1252 int i,j;
1253 for (i=0; i < height; i++) {
1254 for (j=0; j < width; j++) {
89ebf4e8 1255 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
1256 }
1257 src += stride;
1258 dst += stride;
1259 }
1260}
1261
1262static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1263 int i,j;
1264 for (i=0; i < height; i++) {
1265 for (j=0; j < width; j++) {
1266 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
1267 }
1268 src += stride;
1269 dst += stride;
1270 }
1271}
1272
1273static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1274 int i,j;
1275 for (i=0; i < height; i++) {
1276 for (j=0; j < width; j++) {
89ebf4e8 1277 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
1278 }
1279 src += stride;
1280 dst += stride;
1281 }
1282}
1283
1284static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1285 int i,j;
1286 for (i=0; i < height; i++) {
1287 for (j=0; j < width; j++) {
1288 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1289 }
1290 src += stride;
1291 dst += stride;
1292 }
1293}
669ac79c
MN
1294#if 0
1295#define TPEL_WIDTH(width)\
1296static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1297 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
1298static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1299 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
1300static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1301 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
1302static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1303 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
1304static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1305 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
1306static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1307 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
1308static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1309 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
1310static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1311 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
1312static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1313 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
1314#endif
1315
0da71265
MN
1316#define H264_CHROMA_MC(OPNAME, OP)\
1317static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1318 const int A=(8-x)*(8-y);\
1319 const int B=( x)*(8-y);\
1320 const int C=(8-x)*( y);\
1321 const int D=( x)*( y);\
1322 int i;\
1323 \
1324 assert(x<8 && y<8 && x>=0 && y>=0);\
1325\
1326 for(i=0; i<h; i++)\
1327 {\
1328 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1329 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1330 dst+= stride;\
1331 src+= stride;\
1332 }\
1333}\
1334\
1335static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1336 const int A=(8-x)*(8-y);\
1337 const int B=( x)*(8-y);\
1338 const int C=(8-x)*( y);\
1339 const int D=( x)*( y);\
1340 int i;\
1341 \
1342 assert(x<8 && y<8 && x>=0 && y>=0);\
1343\
1344 for(i=0; i<h; i++)\
1345 {\
1346 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1347 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1348 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1349 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1350 dst+= stride;\
1351 src+= stride;\
1352 }\
1353}\
1354\
1355static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1356 const int A=(8-x)*(8-y);\
1357 const int B=( x)*(8-y);\
1358 const int C=(8-x)*( y);\
1359 const int D=( x)*( y);\
1360 int i;\
1361 \
1362 assert(x<8 && y<8 && x>=0 && y>=0);\
1363\
1364 for(i=0; i<h; i++)\
1365 {\
1366 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1367 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1368 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1369 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1370 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
1371 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
1372 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
1373 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
1374 dst+= stride;\
1375 src+= stride;\
1376 }\
1377}
1378
1379#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
1380#define op_put(a, b) a = (((b) + 32)>>6)
1381
1382H264_CHROMA_MC(put_ , op_put)
1383H264_CHROMA_MC(avg_ , op_avg)
1384#undef op_avg
1385#undef op_put
1386
1387static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
1388{
1389 int i;
1390 for(i=0; i<h; i++)
1391 {
1392 ST32(dst , LD32(src ));
1393 dst+=dstStride;
1394 src+=srcStride;
1395 }
1396}
1397
1398static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
1399{
1400 int i;
1401 for(i=0; i<h; i++)
1402 {
1403 ST32(dst , LD32(src ));
1404 ST32(dst+4 , LD32(src+4 ));
1405 dst+=dstStride;
1406 src+=srcStride;
1407 }
1408}
1409
1410static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
1411{
1412 int i;
1413 for(i=0; i<h; i++)
1414 {
1415 ST32(dst , LD32(src ));
1416 ST32(dst+4 , LD32(src+4 ));
1417 ST32(dst+8 , LD32(src+8 ));
1418 ST32(dst+12, LD32(src+12));
1419 dst+=dstStride;
1420 src+=srcStride;
1421 }
1422}
073b013d 1423
0c1a9eda 1424static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
44eb4951 1425{
44eb4951
MN
1426 int i;
1427 for(i=0; i<h; i++)
1428 {
b3184779
MN
1429 ST32(dst , LD32(src ));
1430 ST32(dst+4 , LD32(src+4 ));
1431 ST32(dst+8 , LD32(src+8 ));
1432 ST32(dst+12, LD32(src+12));
1433 dst[16]= src[16];
44eb4951
MN
1434 dst+=dstStride;
1435 src+=srcStride;
1436 }
1437}
1438
0c1a9eda 1439static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
44eb4951
MN
1440{
1441 int i;
b3184779 1442 for(i=0; i<h; i++)
44eb4951 1443 {
b3184779
MN
1444 ST32(dst , LD32(src ));
1445 ST32(dst+4 , LD32(src+4 ));
1446 dst[8]= src[8];
44eb4951
MN
1447 dst+=dstStride;
1448 src+=srcStride;
1449 }
1450}
1451
826f429a 1452
b3184779 1453#define QPEL_MC(r, OPNAME, RND, OP) \
0c1a9eda
ZK
1454static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1455 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
b3184779
MN
1456 int i;\
1457 for(i=0; i<h; i++)\
1458 {\
1459 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
1460 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
1461 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
1462 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
1463 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
1464 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
1465 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
1466 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
1467 dst+=dstStride;\
1468 src+=srcStride;\
1469 }\
44eb4951
MN
1470}\
1471\
0c1a9eda 1472static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
db794953 1473 const int w=8;\
0c1a9eda 1474 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
b3184779
MN
1475 int i;\
1476 for(i=0; i<w; i++)\
1477 {\
1478 const int src0= src[0*srcStride];\
1479 const int src1= src[1*srcStride];\
1480 const int src2= src[2*srcStride];\
1481 const int src3= src[3*srcStride];\
1482 const int src4= src[4*srcStride];\
1483 const int src5= src[5*srcStride];\
1484 const int src6= src[6*srcStride];\
1485 const int src7= src[7*srcStride];\
1486 const int src8= src[8*srcStride];\
1487 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
1488 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
1489 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
1490 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
1491 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
1492 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
1493 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
1494 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
1495 dst++;\
1496 src++;\
1497 }\
1498}\
1499\
0c1a9eda
ZK
1500static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1501 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
b3184779 1502 int i;\
826f429a 1503 \
b3184779
MN
1504 for(i=0; i<h; i++)\
1505 {\
1506 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
1507 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
1508 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
1509 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
1510 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
1511 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
1512 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
1513 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
1514 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
1515 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
1516 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
1517 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
1518 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
1519 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
1520 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
1521 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
1522 dst+=dstStride;\
1523 src+=srcStride;\
1524 }\
1525}\
1526\
0c1a9eda
ZK
1527static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1528 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
b3184779 1529 int i;\
826f429a 1530 const int w=16;\
b3184779
MN
1531 for(i=0; i<w; i++)\
1532 {\
1533 const int src0= src[0*srcStride];\
1534 const int src1= src[1*srcStride];\
1535 const int src2= src[2*srcStride];\
1536 const int src3= src[3*srcStride];\
1537 const int src4= src[4*srcStride];\
1538 const int src5= src[5*srcStride];\
1539 const int src6= src[6*srcStride];\
1540 const int src7= src[7*srcStride];\
1541 const int src8= src[8*srcStride];\
1542 const int src9= src[9*srcStride];\
1543 const int src10= src[10*srcStride];\
1544 const int src11= src[11*srcStride];\
1545 const int src12= src[12*srcStride];\
1546 const int src13= src[13*srcStride];\
1547 const int src14= src[14*srcStride];\
1548 const int src15= src[15*srcStride];\
1549 const int src16= src[16*srcStride];\
1550 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
1551 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
1552 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
1553 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
1554 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
1555 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
1556 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
1557 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
1558 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
1559 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
1560 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
1561 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
1562 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
1563 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
1564 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
1565 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
1566 dst++;\
1567 src++;\
1568 }\
1569}\
1570\
0c1a9eda 1571static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
45553457 1572 OPNAME ## pixels8_c(dst, src, stride, 8);\
b3184779
MN
1573}\
1574\
0c1a9eda
ZK
1575static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1576 uint8_t half[64];\
b3184779
MN
1577 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1578 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
44eb4951
MN
1579}\
1580\
0c1a9eda 1581static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
b3184779 1582 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
44eb4951
MN
1583}\
1584\
0c1a9eda
ZK
1585static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1586 uint8_t half[64];\
b3184779
MN
1587 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1588 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
44eb4951
MN
1589}\
1590\
0c1a9eda
ZK
1591static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1592 uint8_t full[16*9];\
1593 uint8_t half[64];\
b3184779 1594 copy_block9(full, src, 16, stride, 9);\
db794953 1595 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
b3184779 1596 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
44eb4951
MN
1597}\
1598\
0c1a9eda
ZK
1599static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1600 uint8_t full[16*9];\
b3184779 1601 copy_block9(full, src, 16, stride, 9);\
db794953 1602 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
44eb4951
MN
1603}\
1604\
0c1a9eda
ZK
1605static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1606 uint8_t full[16*9];\
1607 uint8_t half[64];\
b3184779 1608 copy_block9(full, src, 16, stride, 9);\
db794953 1609 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
b3184779 1610 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
44eb4951 1611}\
0c1a9eda
ZK
1612void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1613 uint8_t full[16*9];\
1614 uint8_t halfH[72];\
1615 uint8_t halfV[64];\
1616 uint8_t halfHV[64];\
b3184779
MN
1617 copy_block9(full, src, 16, stride, 9);\
1618 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1619 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1620 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1621 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1622}\
0c1a9eda
ZK
1623static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1624 uint8_t full[16*9];\
1625 uint8_t halfH[72];\
1626 uint8_t halfHV[64];\
db794953
MN
1627 copy_block9(full, src, 16, stride, 9);\
1628 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1629 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1630 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1631 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1632}\
0c1a9eda
ZK
1633void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1634 uint8_t full[16*9];\
1635 uint8_t halfH[72];\
1636 uint8_t halfV[64];\
1637 uint8_t halfHV[64];\
b3184779
MN
1638 copy_block9(full, src, 16, stride, 9);\
1639 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1640 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1641 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1642 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1643}\
0c1a9eda
ZK
1644static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1645 uint8_t full[16*9];\
1646 uint8_t halfH[72];\
1647 uint8_t halfHV[64];\
db794953
MN
1648 copy_block9(full, src, 16, stride, 9);\
1649 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1650 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1651 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1652 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1653}\
0c1a9eda
ZK
1654void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1655 uint8_t full[16*9];\
1656 uint8_t halfH[72];\
1657 uint8_t halfV[64];\
1658 uint8_t halfHV[64];\
b3184779
MN
1659 copy_block9(full, src, 16, stride, 9);\
1660 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1661 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1662 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1663 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1664}\
0c1a9eda
ZK
1665static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1666 uint8_t full[16*9];\
1667 uint8_t halfH[72];\
1668 uint8_t halfHV[64];\
db794953
MN
1669 copy_block9(full, src, 16, stride, 9);\
1670 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1671 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1672 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1673 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1674}\
0c1a9eda
ZK
1675void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1676 uint8_t full[16*9];\
1677 uint8_t halfH[72];\
1678 uint8_t halfV[64];\
1679 uint8_t halfHV[64];\
b3184779
MN
1680 copy_block9(full, src, 16, stride, 9);\
1681 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
db794953
MN
1682 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1683 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1684 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1685}\
0c1a9eda
ZK
1686static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1687 uint8_t full[16*9];\
1688 uint8_t halfH[72];\
1689 uint8_t halfHV[64];\
db794953
MN
1690 copy_block9(full, src, 16, stride, 9);\
1691 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1692 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1693 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1694 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1695}\
0c1a9eda
ZK
1696static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1697 uint8_t halfH[72];\
1698 uint8_t halfHV[64];\
b3184779 1699 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1700 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1701 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
44eb4951 1702}\
0c1a9eda
ZK
1703static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1704 uint8_t halfH[72];\
1705 uint8_t halfHV[64];\
b3184779 1706 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1707 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1708 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
44eb4951 1709}\
0c1a9eda
ZK
1710void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1711 uint8_t full[16*9];\
1712 uint8_t halfH[72];\
1713 uint8_t halfV[64];\
1714 uint8_t halfHV[64];\
b3184779
MN
1715 copy_block9(full, src, 16, stride, 9);\
1716 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1717 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1718 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1719 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 1720}\
0c1a9eda
ZK
1721static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1722 uint8_t full[16*9];\
1723 uint8_t halfH[72];\
db794953
MN
1724 copy_block9(full, src, 16, stride, 9);\
1725 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1726 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1727 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1728}\
0c1a9eda
ZK
1729void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1730 uint8_t full[16*9];\
1731 uint8_t halfH[72];\
1732 uint8_t halfV[64];\
1733 uint8_t halfHV[64];\
b3184779
MN
1734 copy_block9(full, src, 16, stride, 9);\
1735 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1736 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1737 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1738 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 1739}\
0c1a9eda
ZK
1740static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1741 uint8_t full[16*9];\
1742 uint8_t halfH[72];\
db794953
MN
1743 copy_block9(full, src, 16, stride, 9);\
1744 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1745 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1746 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1747}\
0c1a9eda
ZK
1748static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1749 uint8_t halfH[72];\
b3184779 1750 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1751 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
b3184779 1752}\
0c1a9eda 1753static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
45553457 1754 OPNAME ## pixels16_c(dst, src, stride, 16);\
b3184779
MN
1755}\
1756\
0c1a9eda
ZK
1757static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1758 uint8_t half[256];\
b3184779
MN
1759 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1760 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
1761}\
1762\
0c1a9eda 1763static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
b3184779 1764 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
44eb4951 1765}\
b3184779 1766\
0c1a9eda
ZK
1767static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1768 uint8_t half[256];\
b3184779
MN
1769 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1770 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
1771}\
1772\
0c1a9eda
ZK
1773static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1774 uint8_t full[24*17];\
1775 uint8_t half[256];\
b3184779 1776 copy_block17(full, src, 24, stride, 17);\
826f429a 1777 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
b3184779
MN
1778 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
1779}\
1780\
0c1a9eda
ZK
1781static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1782 uint8_t full[24*17];\
b3184779 1783 copy_block17(full, src, 24, stride, 17);\
826f429a 1784 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
b3184779
MN
1785}\
1786\
0c1a9eda
ZK
1787static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1788 uint8_t full[24*17];\
1789 uint8_t half[256];\
b3184779 1790 copy_block17(full, src, 24, stride, 17);\
826f429a 1791 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
b3184779
MN
1792 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
1793}\
0c1a9eda
ZK
1794void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1795 uint8_t full[24*17];\
1796 uint8_t halfH[272];\
1797 uint8_t halfV[256];\
1798 uint8_t halfHV[256];\
b3184779
MN
1799 copy_block17(full, src, 24, stride, 17);\
1800 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1801 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1802 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1803 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1804}\
0c1a9eda
ZK
1805static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1806 uint8_t full[24*17];\
1807 uint8_t halfH[272];\
1808 uint8_t halfHV[256];\
db794953
MN
1809 copy_block17(full, src, 24, stride, 17);\
1810 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1811 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1812 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1813 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1814}\
0c1a9eda
ZK
1815void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1816 uint8_t full[24*17];\
1817 uint8_t halfH[272];\
1818 uint8_t halfV[256];\
1819 uint8_t halfHV[256];\
b3184779
MN
1820 copy_block17(full, src, 24, stride, 17);\
1821 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1822 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1823 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1824 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1825}\
0c1a9eda
ZK
1826static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1827 uint8_t full[24*17];\
1828 uint8_t halfH[272];\
1829 uint8_t halfHV[256];\
db794953
MN
1830 copy_block17(full, src, 24, stride, 17);\
1831 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1832 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1833 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1834 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1835}\
0c1a9eda
ZK
1836void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1837 uint8_t full[24*17];\
1838 uint8_t halfH[272];\
1839 uint8_t halfV[256];\
1840 uint8_t halfHV[256];\
b3184779
MN
1841 copy_block17(full, src, 24, stride, 17);\
1842 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1843 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1844 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1845 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1846}\
0c1a9eda
ZK
1847static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1848 uint8_t full[24*17];\
1849 uint8_t halfH[272];\
1850 uint8_t halfHV[256];\
db794953
MN
1851 copy_block17(full, src, 24, stride, 17);\
1852 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1853 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1854 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1855 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1856}\
0c1a9eda
ZK
1857void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1858 uint8_t full[24*17];\
1859 uint8_t halfH[272];\
1860 uint8_t halfV[256];\
1861 uint8_t halfHV[256];\
b3184779
MN
1862 copy_block17(full, src, 24, stride, 17);\
1863 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
826f429a
MN
1864 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1865 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1866 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1867}\
0c1a9eda
ZK
1868static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1869 uint8_t full[24*17];\
1870 uint8_t halfH[272];\
1871 uint8_t halfHV[256];\
db794953
MN
1872 copy_block17(full, src, 24, stride, 17);\
1873 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1874 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1875 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1876 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1877}\
0c1a9eda
ZK
1878static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1879 uint8_t halfH[272];\
1880 uint8_t halfHV[256];\
b3184779 1881 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1882 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1883 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1884}\
0c1a9eda
ZK
1885static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1886 uint8_t halfH[272];\
1887 uint8_t halfHV[256];\
b3184779 1888 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1889 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1890 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1891}\
0c1a9eda
ZK
1892void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1893 uint8_t full[24*17];\
1894 uint8_t halfH[272];\
1895 uint8_t halfV[256];\
1896 uint8_t halfHV[256];\
b3184779
MN
1897 copy_block17(full, src, 24, stride, 17);\
1898 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1899 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1900 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1901 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
1902}\
0c1a9eda
ZK
1903static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1904 uint8_t full[24*17];\
1905 uint8_t halfH[272];\
db794953
MN
1906 copy_block17(full, src, 24, stride, 17);\
1907 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1908 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1909 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1910}\
0c1a9eda
ZK
1911void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1912 uint8_t full[24*17];\
1913 uint8_t halfH[272];\
1914 uint8_t halfV[256];\
1915 uint8_t halfHV[256];\
b3184779
MN
1916 copy_block17(full, src, 24, stride, 17);\
1917 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1918 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1919 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1920 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
1921}\
0c1a9eda
ZK
1922static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1923 uint8_t full[24*17];\
1924 uint8_t halfH[272];\
db794953
MN
1925 copy_block17(full, src, 24, stride, 17);\
1926 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1927 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1928 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1929}\
0c1a9eda
ZK
1930static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1931 uint8_t halfH[272];\
b3184779 1932 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1933 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
45553457 1934}
44eb4951 1935
b3184779
MN
1936#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1937#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1938#define op_put(a, b) a = cm[((b) + 16)>>5]
1939#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1940
1941QPEL_MC(0, put_ , _ , op_put)
1942QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1943QPEL_MC(0, avg_ , _ , op_avg)
1944//QPEL_MC(1, avg_no_rnd , _ , op_avg)
1945#undef op_avg
1946#undef op_avg_no_rnd
1947#undef op_put
1948#undef op_put_no_rnd
44eb4951 1949
0da71265
MN
1950#if 1
1951#define H264_LOWPASS(OPNAME, OP, OP2) \
1952static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1953 const int h=4;\
1954 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1955 int i;\
1956 for(i=0; i<h; i++)\
1957 {\
1958 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
1959 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
1960 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
1961 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
1962 dst+=dstStride;\
1963 src+=srcStride;\
1964 }\
1965}\
1966\
1967static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1968 const int w=4;\
1969 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1970 int i;\
1971 for(i=0; i<w; i++)\
1972 {\
1973 const int srcB= src[-2*srcStride];\
1974 const int srcA= src[-1*srcStride];\
1975 const int src0= src[0 *srcStride];\
1976 const int src1= src[1 *srcStride];\
1977 const int src2= src[2 *srcStride];\
1978 const int src3= src[3 *srcStride];\
1979 const int src4= src[4 *srcStride];\
1980 const int src5= src[5 *srcStride];\
1981 const int src6= src[6 *srcStride];\
1982 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
1983 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
1984 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
1985 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
1986 dst++;\
1987 src++;\
1988 }\
1989}\
1990\
1991static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
1992 const int h=4;\
1993 const int w=4;\
1994 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1995 int i;\
1996 src -= 2*srcStride;\
1997 for(i=0; i<h+5; i++)\
1998 {\
1999 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2000 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2001 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
2002 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
2003 tmp+=tmpStride;\
2004 src+=srcStride;\
2005 }\
2006 tmp -= tmpStride*(h+5-2);\
2007 for(i=0; i<w; i++)\
2008 {\
2009 const int tmpB= tmp[-2*tmpStride];\
2010 const int tmpA= tmp[-1*tmpStride];\
2011 const int tmp0= tmp[0 *tmpStride];\
2012 const int tmp1= tmp[1 *tmpStride];\
2013 const int tmp2= tmp[2 *tmpStride];\
2014 const int tmp3= tmp[3 *tmpStride];\
2015 const int tmp4= tmp[4 *tmpStride];\
2016 const int tmp5= tmp[5 *tmpStride];\
2017 const int tmp6= tmp[6 *tmpStride];\
2018 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2019 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2020 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2021 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2022 dst++;\
2023 tmp++;\
2024 }\
2025}\
2026\
2027static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2028 const int h=8;\
2029 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
2030 int i;\
2031 for(i=0; i<h; i++)\
2032 {\
2033 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
2034 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
2035 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
2036 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
2037 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
2038 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
2039 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
2040 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
2041 dst+=dstStride;\
2042 src+=srcStride;\
2043 }\
2044}\
2045\
2046static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2047 const int w=8;\
2048 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
2049 int i;\
2050 for(i=0; i<w; i++)\
2051 {\
2052 const int srcB= src[-2*srcStride];\
2053 const int srcA= src[-1*srcStride];\
2054 const int src0= src[0 *srcStride];\
2055 const int src1= src[1 *srcStride];\
2056 const int src2= src[2 *srcStride];\
2057 const int src3= src[3 *srcStride];\
2058 const int src4= src[4 *srcStride];\
2059 const int src5= src[5 *srcStride];\
2060 const int src6= src[6 *srcStride];\
2061 const int src7= src[7 *srcStride];\
2062 const int src8= src[8 *srcStride];\
2063 const int src9= src[9 *srcStride];\
2064 const int src10=src[10*srcStride];\
2065 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2066 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2067 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2068 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2069 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
2070 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
2071 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
2072 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
2073 dst++;\
2074 src++;\
2075 }\
2076}\
2077\
2078static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2079 const int h=8;\
2080 const int w=8;\
2081 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
2082 int i;\
2083 src -= 2*srcStride;\
2084 for(i=0; i<h+5; i++)\
2085 {\
2086 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
2087 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
2088 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
2089 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
2090 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
2091 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
2092 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
2093 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
2094 tmp+=tmpStride;\
2095 src+=srcStride;\
2096 }\
2097 tmp -= tmpStride*(h+5-2);\
2098 for(i=0; i<w; i++)\
2099 {\
2100 const int tmpB= tmp[-2*tmpStride];\
2101 const int tmpA= tmp[-1*tmpStride];\
2102 const int tmp0= tmp[0 *tmpStride];\
2103 const int tmp1= tmp[1 *tmpStride];\
2104 const int tmp2= tmp[2 *tmpStride];\
2105 const int tmp3= tmp[3 *tmpStride];\
2106 const int tmp4= tmp[4 *tmpStride];\
2107 const int tmp5= tmp[5 *tmpStride];\
2108 const int tmp6= tmp[6 *tmpStride];\
2109 const int tmp7= tmp[7 *tmpStride];\
2110 const int tmp8= tmp[8 *tmpStride];\
2111 const int tmp9= tmp[9 *tmpStride];\
2112 const int tmp10=tmp[10*tmpStride];\
2113 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2114 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2115 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2116 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2117 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
2118 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
2119 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
2120 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
2121 dst++;\
2122 tmp++;\
2123 }\
2124}\
2125\
2126static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2127 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
2128 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2129 src += 8*srcStride;\
2130 dst += 8*dstStride;\
2131 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
2132 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2133}\
2134\
2135static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2136 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
2137 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2138 src += 8*srcStride;\
2139 dst += 8*dstStride;\
2140 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
2141 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2142}\
2143\
2144static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2145 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2146 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2147 src += 8*srcStride;\
0da71265
MN
2148 dst += 8*dstStride;\
2149 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2150 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2151}\
2152
2153#define H264_MC(OPNAME, SIZE) \
2154static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
2155 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
2156}\
2157\
2158static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2159 uint8_t half[SIZE*SIZE];\
2160 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2161 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
2162}\
2163\
2164static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
2165 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
2166}\
2167\
2168static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2169 uint8_t half[SIZE*SIZE];\
2170 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2171 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
2172}\
2173\
2174static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2175 uint8_t full[SIZE*(SIZE+5)];\
2176 uint8_t * const full_mid= full + SIZE*2;\
2177 uint8_t half[SIZE*SIZE];\
2178 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2179 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2180 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
2181}\
2182\
2183static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2184 uint8_t full[SIZE*(SIZE+5)];\
2185 uint8_t * const full_mid= full + SIZE*2;\
2186 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2187 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
2188}\
2189\
2190static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2191 uint8_t full[SIZE*(SIZE+5)];\
2192 uint8_t * const full_mid= full + SIZE*2;\
2193 uint8_t half[SIZE*SIZE];\
2194 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2195 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2196 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
2197}\
2198\
2199static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2200 uint8_t full[SIZE*(SIZE+5)];\
2201 uint8_t * const full_mid= full + SIZE*2;\
2202 uint8_t halfH[SIZE*SIZE];\
2203 uint8_t halfV[SIZE*SIZE];\
2204 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2205 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2206 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2207 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2208}\
2209\
2210static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2211 uint8_t full[SIZE*(SIZE+5)];\
2212 uint8_t * const full_mid= full + SIZE*2;\
2213 uint8_t halfH[SIZE*SIZE];\
2214 uint8_t halfV[SIZE*SIZE];\
2215 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2216 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2217 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2218 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2219}\
2220\
2221static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2222 uint8_t full[SIZE*(SIZE+5)];\
2223 uint8_t * const full_mid= full + SIZE*2;\
2224 uint8_t halfH[SIZE*SIZE];\
2225 uint8_t halfV[SIZE*SIZE];\
2226 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2227 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2228 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2229 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2230}\
2231\
2232static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2233 uint8_t full[SIZE*(SIZE+5)];\
2234 uint8_t * const full_mid= full + SIZE*2;\
2235 uint8_t halfH[SIZE*SIZE];\
2236 uint8_t halfV[SIZE*SIZE];\
2237 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2238 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2239 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2240 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2241}\
2242\
2243static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2244 int16_t tmp[SIZE*(SIZE+5)];\
2245 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
2246}\
2247\
2248static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2249 int16_t tmp[SIZE*(SIZE+5)];\
2250 uint8_t halfH[SIZE*SIZE];\
2251 uint8_t halfHV[SIZE*SIZE];\
2252 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2253 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2254 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2255}\
2256\
2257static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2258 int16_t tmp[SIZE*(SIZE+5)];\
2259 uint8_t halfH[SIZE*SIZE];\
2260 uint8_t halfHV[SIZE*SIZE];\
2261 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2262 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2263 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2264}\
2265\
2266static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2267 uint8_t full[SIZE*(SIZE+5)];\
2268 uint8_t * const full_mid= full + SIZE*2;\
2269 int16_t tmp[SIZE*(SIZE+5)];\
2270 uint8_t halfV[SIZE*SIZE];\
2271 uint8_t halfHV[SIZE*SIZE];\
2272 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2273 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2274 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2275 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2276}\
2277\
2278static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2279 uint8_t full[SIZE*(SIZE+5)];\
2280 uint8_t * const full_mid= full + SIZE*2;\
2281 int16_t tmp[SIZE*(SIZE+5)];\
2282 uint8_t halfV[SIZE*SIZE];\
2283 uint8_t halfHV[SIZE*SIZE];\
2284 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2285 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2286 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2287 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2288}\
2289
2290#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2291//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
2292#define op_put(a, b) a = cm[((b) + 16)>>5]
2293#define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
2294#define op2_put(a, b) a = cm[((b) + 512)>>10]
2295
2296H264_LOWPASS(put_ , op_put, op2_put)
2297H264_LOWPASS(avg_ , op_avg, op2_avg)
2298H264_MC(put_, 4)
2299H264_MC(put_, 8)
2300H264_MC(put_, 16)
2301H264_MC(avg_, 4)
2302H264_MC(avg_, 8)
2303H264_MC(avg_, 16)
2304
2305#undef op_avg
2306#undef op_put
2307#undef op2_avg
2308#undef op2_put
2309#endif
2310
1457ab52
MN
2311static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
2312 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2313 int i;
2314
2315 for(i=0; i<h; i++){
2316 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
2317 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
2318 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
2319 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
2320 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
2321 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
2322 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
2323 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
2324 dst+=dstStride;
2325 src+=srcStride;
2326 }
2327}
2328
2329static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
2330 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2331 int i;
2332
2333 for(i=0; i<w; i++){
2334 const int src_1= src[ -srcStride];
2335 const int src0 = src[0 ];
2336 const int src1 = src[ srcStride];
2337 const int src2 = src[2*srcStride];
2338 const int src3 = src[3*srcStride];
2339 const int src4 = src[4*srcStride];
2340 const int src5 = src[5*srcStride];
2341 const int src6 = src[6*srcStride];
2342 const int src7 = src[7*srcStride];
2343 const int src8 = src[8*srcStride];
2344 const int src9 = src[9*srcStride];
2345 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
2346 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
2347 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
2348 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
2349 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
2350 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
2351 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
2352 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
2353 src++;
2354 dst++;
2355 }
2356}
2357
2358static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
2359 put_pixels8_c(dst, src, stride, 8);
2360}
2361
2362static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
2363 uint8_t half[64];
2364 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
2365 put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
2366}
2367
2368static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
2369 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
2370}
2371
2372static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
2373 uint8_t half[64];
2374 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
2375 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
2376}
2377
2378static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
2379 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
2380}
2381
2382static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
2383 uint8_t halfH[88];
2384 uint8_t halfV[64];
2385 uint8_t halfHV[64];
2386 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2387 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
2388 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
2389 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
2390}
2391static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
2392 uint8_t halfH[88];
2393 uint8_t halfV[64];
2394 uint8_t halfHV[64];
2395 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2396 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
2397 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
2398 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
2399}
2400static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
2401 uint8_t halfH[88];
2402 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2403 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
2404}
2405
332f9ac4
MN
2406static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
2407 int x;
2408 const int strength= ff_h263_loop_filter_strength[qscale];
2409
2410 for(x=0; x<8; x++){
2411 int d1, d2, ad1;
2412 int p0= src[x-2*stride];
2413 int p1= src[x-1*stride];
2414 int p2= src[x+0*stride];
2415 int p3= src[x+1*stride];
2416 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
2417
2418 if (d<-2*strength) d1= 0;
2419 else if(d<- strength) d1=-2*strength - d;
2420 else if(d< strength) d1= d;
2421 else if(d< 2*strength) d1= 2*strength - d;
2422 else d1= 0;
2423
2424 p1 += d1;
2425 p2 -= d1;
2426 if(p1&256) p1= ~(p1>>31);
2427 if(p2&256) p2= ~(p2>>31);
2428
2429 src[x-1*stride] = p1;
2430 src[x+0*stride] = p2;
2431
5b5404e3 2432 ad1= ABS(d1)>>1;
332f9ac4
MN
2433
2434 d2= clip((p0-p3)/4, -ad1, ad1);
2435
2436 src[x-2*stride] = p0 - d2;
2437 src[x+ stride] = p3 + d2;
2438 }
2439}
2440
2441static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
2442 int y;
2443 const int strength= ff_h263_loop_filter_strength[qscale];
2444
2445 for(y=0; y<8; y++){
2446 int d1, d2, ad1;
2447 int p0= src[y*stride-2];
2448 int p1= src[y*stride-1];
2449 int p2= src[y*stride+0];
2450 int p3= src[y*stride+1];
2451 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
2452
2453 if (d<-2*strength) d1= 0;
2454 else if(d<- strength) d1=-2*strength - d;
2455 else if(d< strength) d1= d;
2456 else if(d< 2*strength) d1= 2*strength - d;
2457 else d1= 0;
2458
2459 p1 += d1;
2460 p2 -= d1;
2461 if(p1&256) p1= ~(p1>>31);
2462 if(p2&256) p2= ~(p2>>31);
2463
2464 src[y*stride-1] = p1;
2465 src[y*stride+0] = p2;
2466
2467 ad1= ABS(d1)>>1;
2468
2469 d2= clip((p0-p3)/4, -ad1, ad1);
2470
2471 src[y*stride-2] = p0 - d2;
2472 src[y*stride+1] = p3 + d2;
2473 }
2474}
1457ab52 2475
fdbbf2e0
MN
2476static void h261_loop_filter_c(uint8_t *src, int stride){
2477 int x,y,xy,yz;
2478 int temp[64];
2479
2480 for(x=0; x<8; x++){
2481 temp[x ] = 4*src[x ];
2482 temp[x + 7*8] = 4*src[x + 7*stride];
2483 }
2484 for(y=1; y<7; y++){
2485 for(x=0; x<8; x++){
2486 xy = y * stride + x;
2487 yz = y * 8 + x;
2488 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
c6148de2
MN
2489 }
2490 }
fdbbf2e0
MN
2491
2492 for(y=0; y<8; y++){
2493 src[ y*stride] = (temp[ y*8] + 2)>>2;
2494 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
2495 for(x=1; x<7; x++){
2496 xy = y * stride + x;
2497 yz = y * 8 + x;
2498 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
c6148de2
MN
2499 }
2500 }
2501}
2502
bb198e19 2503static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
2504{
2505 int s, i;
2506
2507 s = 0;
bb198e19 2508 for(i=0;i<h;i++) {
de6d9b64
FB
2509 s += abs(pix1[0] - pix2[0]);
2510 s += abs(pix1[1] - pix2[1]);
2511 s += abs(pix1[2] - pix2[2]);
2512 s += abs(pix1[3] - pix2[3]);
2513 s += abs(pix1[4] - pix2[4]);
2514 s += abs(pix1[5] - pix2[5]);
2515 s += abs(pix1[6] - pix2[6]);
2516 s += abs(pix1[7] - pix2[7]);
2517 s += abs(pix1[8] - pix2[8]);
2518 s += abs(pix1[9] - pix2[9]);
2519 s += abs(pix1[10] - pix2[10]);
2520 s += abs(pix1[11] - pix2[11]);
2521 s += abs(pix1[12] - pix2[12]);
2522 s += abs(pix1[13] - pix2[13]);
2523 s += abs(pix1[14] - pix2[14]);
2524 s += abs(pix1[15] - pix2[15]);
2525 pix1 += line_size;
2526 pix2 += line_size;
2527 }
2528 return s;
2529}
2530
bb198e19 2531static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
2532{
2533 int s, i;
2534
2535 s = 0;
bb198e19 2536 for(i=0;i<h;i++) {
de6d9b64
FB
2537 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
2538 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
2539 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
2540 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
2541 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
2542 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
2543 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
2544 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
2545 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
2546 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
2547 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
2548 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
2549 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
2550 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
2551 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
2552 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
2553 pix1 += line_size;
2554 pix2 += line_size;
2555 }
2556 return s;
2557}
2558
bb198e19 2559static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
2560{
2561 int s, i;
0c1a9eda 2562 uint8_t *pix3 = pix2 + line_size;
de6d9b64
FB
2563
2564 s = 0;
bb198e19 2565 for(i=0;i<h;i++) {
de6d9b64
FB
2566 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
2567 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
2568 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
2569 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
2570 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
2571 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
2572 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
2573 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
2574 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
2575 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
2576 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
2577 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
2578 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
2579 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
2580 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
2581 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
2582 pix1 += line_size;
2583 pix2 += line_size;
2584 pix3 += line_size;
2585 }
2586 return s;
2587}
2588
bb198e19 2589static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
2590{
2591 int s, i;
0c1a9eda 2592 uint8_t *pix3 = pix2 + line_size;
de6d9b64
FB
2593
2594 s = 0;
bb198e19 2595 for(i=0;i<h;i++) {
de6d9b64
FB
2596 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
2597 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
2598 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
2599 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
2600 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
2601 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
2602 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
2603 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
2604 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
2605 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
2606 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
2607 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
2608 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
2609 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
2610 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
2611 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
2612 pix1 += line_size;
2613 pix2 += line_size;
2614 pix3 += line_size;
2615 }
2616 return s;
2617}
2618
bb198e19 2619static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
2620{
2621 int s, i;
2622
2623 s = 0;
bb198e19 2624 for(i=0;i<h;i++) {
ba6802de
MN
2625 s += abs(pix1[0] - pix2[0]);
2626 s += abs(pix1[1] - pix2[1]);
2627 s += abs(pix1[2] - pix2[2]);
2628 s += abs(pix1[3] - pix2[3]);
2629 s += abs(pix1[4] - pix2[4]);
2630 s += abs(pix1[5] - pix2[5]);
2631 s += abs(pix1[6] - pix2[6]);
2632 s += abs(pix1[7] - pix2[7]);
2633 pix1 += line_size;
2634 pix2 += line_size;
2635 }
2636 return s;
2637}
2638
bb198e19 2639static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
2640{
2641 int s, i;
2642
2643 s = 0;
bb198e19 2644 for(i=0;i<h;i++) {
ba6802de
MN
2645 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
2646 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
2647 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
2648 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
2649 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
2650 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
2651 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
2652 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
2653 pix1 += line_size;
2654 pix2 += line_size;
2655 }
2656 return s;
2657}
2658
bb198e19 2659static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
2660{
2661 int s, i;
0c1a9eda 2662 uint8_t *pix3 = pix2 + line_size;
ba6802de
MN
2663
2664 s = 0;
bb198e19 2665 for(i=0;i<h;i++) {
ba6802de
MN
2666 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
2667 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
2668 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
2669 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
2670 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
2671 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
2672 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
2673 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
2674 pix1 += line_size;
2675 pix2 += line_size;
2676 pix3 += line_size;
2677 }
2678 return s;
2679}
2680
bb198e19 2681static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
2682{
2683 int s, i;
0c1a9eda 2684 uint8_t *pix3 = pix2 + line_size;
ba6802de
MN
2685
2686 s = 0;
bb198e19 2687 for(i=0;i<h;i++) {
ba6802de
MN
2688 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
2689 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
2690 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
2691 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
2692 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
2693 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
2694 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
2695 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
2696 pix1 += line_size;
2697 pix2 += line_size;
2698 pix3 += line_size;
2699 }
2700 return s;
2701}
2702
d4c5d2ad 2703static int nsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, int stride, int h){
e6a2ac34
MN
2704 int score1=0;
2705 int score2=0;
2706 int x,y;
d4c5d2ad 2707
e6a2ac34
MN
2708 for(y=0; y<h; y++){
2709 for(x=0; x<16; x++){
2710 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
2711 }
2712 if(y+1<h){
2713 for(x=0; x<15; x++){
2714 score2+= ABS( s1[x ] - s1[x +stride]
2715 - s1[x+1] + s1[x+1+stride])
2716 -ABS( s2[x ] - s2[x +stride]
2717 - s2[x+1] + s2[x+1+stride]);
2718 }
2719 }
2720 s1+= stride;
2721 s2+= stride;
2722 }
d4c5d2ad
MN
2723
2724 if(c) return score1 + ABS(score2)*c->avctx->nsse_weight;
2725 else return score1 + ABS(score2)*8;
e6a2ac34
MN
2726}
2727
d4c5d2ad 2728static int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, int stride, int h){
e6a2ac34
MN
2729 int score1=0;
2730 int score2=0;
2731 int x,y;
2732
2733 for(y=0; y<h; y++){
2734 for(x=0; x<8; x++){
2735 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
2736 }
2737 if(y+1<h){
2738 for(x=0; x<7; x++){
2739 score2+= ABS( s1[x ] - s1[x +stride]
2740 - s1[x+1] + s1[x+1+stride])
2741 -ABS( s2[x ] - s2[x +stride]
2742 - s2[x+1] + s2[x+1+stride]);
2743 }
2744 }
2745 s1+= stride;
2746 s2+= stride;
2747 }
2748
d4c5d2ad
MN
2749 if(c) return score1 + ABS(score2)*c->avctx->nsse_weight;
2750 else return score1 + ABS(score2)*8;
e6a2ac34
MN
2751}
2752
364a1797
MN
2753static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
2754 int i;
2755 unsigned int sum=0;
2756
2757 for(i=0; i<8*8; i++){
2758 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
2759 int w= weight[i];
2760 b>>= RECON_SHIFT;
2761 assert(-512<b && b<512);
2762
2763 sum += (w*b)*(w*b)>>4;
2764 }
2765 return sum>>2;
2766}
2767
2768static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
2769 int i;
2770
2771 for(i=0; i<8*8; i++){
2772 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
2773 }
2774}
2775
a9badb51
MN
2776/**
2777 * permutes an 8x8 block.
2a5700de 2778 * @param block the block which will be permuted according to the given permutation vector
a9badb51
MN
2779 * @param permutation the permutation vector
2780 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
2a5700de
MN
2781 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
2782 * (inverse) permutated to scantable order!
a9badb51 2783 */
0c1a9eda 2784void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
d962f6fd 2785{
7801d21d 2786 int i;
477ab036 2787 DCTELEM temp[64];
7801d21d
MN
2788
2789 if(last<=0) return;
9a7b310d 2790 //if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms
d962f6fd 2791
7801d21d
MN
2792 for(i=0; i<=last; i++){
2793 const int j= scantable[i];
2794 temp[j]= block[j];
2795 block[j]=0;
2796 }
2797
2798 for(i=0; i<=last; i++){
2799 const int j= scantable[i];
2800 const int perm_j= permutation[j];
2801 block[perm_j]= temp[j];
2802 }
d962f6fd 2803}
e0eac44e 2804
622348f9
MN
2805static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
2806 return 0;
2807}
2808
2809void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
2810 int i;
2811
2812 memset(cmp, 0, sizeof(void*)*5);
2813
2814 for(i=0; i<5; i++){
2815 switch(type&0xFF){
2816 case FF_CMP_SAD:
2817 cmp[i]= c->sad[i];
2818 break;
2819 case FF_CMP_SATD:
2820 cmp[i]= c->hadamard8_diff[i];
2821 break;
2822 case FF_CMP_SSE:
2823 cmp[i]= c->sse[i];
2824 break;
2825 case FF_CMP_DCT:
2826 cmp[i]= c->dct_sad[i];
2827 break;
2828 case FF_CMP_PSNR:
2829 cmp[i]= c->quant_psnr[i];
2830 break;
2831 case FF_CMP_BIT:
2832 cmp[i]= c->bit[i];
2833 break;
2834 case FF_CMP_RD:
2835 cmp[i]= c->rd[i];
2836 break;
2837 case FF_CMP_VSAD:
2838 cmp[i]= c->vsad[i];
2839 break;
2840 case FF_CMP_VSSE:
2841 cmp[i]= c->vsse[i];
2842 break;
2843 case FF_CMP_ZERO:
2844 cmp[i]= zero_cmp;
2845 break;
e6a2ac34
MN
2846 case FF_CMP_NSSE:
2847 cmp[i]= c->nsse[i];
2848 break;
26efc54e
MN
2849 case FF_CMP_W53:
2850 cmp[i]= c->w53[i];
2851 break;
2852 case FF_CMP_W97:
2853 cmp[i]= c->w97[i];
2854 break;
622348f9
MN
2855 default:
2856 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
2857 }
2858 }
2859}
2860
2a5700de
MN
2861/**
2862 * memset(blocks, 0, sizeof(DCTELEM)*6*64)
2863 */
eb4b3dd3 2864static void clear_blocks_c(DCTELEM *blocks)
649c00c9
MN
2865{
2866 memset(blocks, 0, sizeof(DCTELEM)*6*64);
2867}
2868
11f18faf
MN
2869static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
2870 int i;
d32ac509 2871 for(i=0; i+7<w; i+=8){
11f18faf
MN
2872 dst[i+0] += src[i+0];
2873 dst[i+1] += src[i+1];
2874 dst[i+2] += src[i+2];
2875 dst[i+3] += src[i+3];
2876 dst[i+4] += src[i+4];
2877 dst[i+5] += src[i+5];
2878 dst[i+6] += src[i+6];
2879 dst[i+7] += src[i+7];
2880 }
2881 for(; i<w; i++)
2882 dst[i+0] += src[i+0];
2883}
2884
2885static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
2886 int i;
d32ac509 2887 for(i=0; i+7<w; i+=8){
11f18faf
MN
2888 dst[i+0] = src1[i+0]-src2[i+0];
2889 dst[i+1] = src1[i+1]-src2[i+1];
2890 dst[i+2] = src1[i+2]-src2[i+2];
2891 dst[i+3] = src1[i+3]-src2[i+3];
2892 dst[i+4] = src1[i+4]-src2[i+4];
2893 dst[i+5] = src1[i+5]-src2[i+5];
2894 dst[i+6] = src1[i+6]-src2[i+6];
2895 dst[i+7] = src1[i+7]-src2[i+7];
2896 }
2897 for(; i<w; i++)
2898 dst[i+0] = src1[i+0]-src2[i+0];
2899}
2900
84705403
MN
2901static void sub_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){
2902 int i;
2903 uint8_t l, lt;
2904
2905 l= *left;
2906 lt= *left_top;
2907
2908 for(i=0; i<w; i++){
2909 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
2910 lt= src1[i];
2911 l= src2[i];
2912 dst[i]= l - pred;
2913 }
2914
2915 *left= l;
2916 *left_top= lt;
2917}
2918
1457ab52
MN
2919#define BUTTERFLY2(o1,o2,i1,i2) \
2920o1= (i1)+(i2);\
2921o2= (i1)-(i2);
2922
2923#define BUTTERFLY1(x,y) \
2924{\
2925 int a,b;\
2926 a= x;\
2927 b= y;\
2928 x= a+b;\
2929 y= a-b;\
2930}
2931
2932#define BUTTERFLYA(x,y) (ABS((x)+(y)) + ABS((x)-(y)))
2933
bb198e19 2934static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
1457ab52
MN
2935 int i;
2936 int temp[64];
2937 int sum=0;
bb198e19
MN
2938
2939 assert(h==8);
1457ab52
MN
2940
2941 for(i=0; i<8; i++){
2942 //FIXME try pointer walks
2943 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
2944 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
2945 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
2946 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
2947
2948 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2949 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2950 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2951 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2952
2953 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2954 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2955 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2956 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2957 }
2958
2959 for(i=0; i<8; i++){
2960 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2961 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2962 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2963 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2964
2965 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2966 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2967 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2968 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2969
2970 sum +=
2971 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2972 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2973 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2974 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2975 }
2976#if 0
2977static int maxi=0;
2978if(sum>maxi){
2979 maxi=sum;
2980 printf("MAX:%d\n", maxi);
2981}
2982#endif
2983 return sum;
2984}
2985
622348f9 2986static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
1457ab52
MN
2987 int i;
2988 int temp[64];
2989 int sum=0;
622348f9
MN
2990
2991 assert(h==8);
2992
1457ab52
MN
2993 for(i=0; i<8; i++){
2994 //FIXME try pointer walks
622348f9
MN
2995 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
2996 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
2997 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
2998 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
1457ab52
MN
2999
3000 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
3001 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
3002 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
3003 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
3004
3005 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
3006 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
3007 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
3008 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
3009 }
3010
3011 for(i=0; i<8; i++){
3012 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
3013 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
3014 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
3015 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
3016
3017 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
3018 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
3019 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
3020 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
3021
3022 sum +=
3023 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
3024 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
3025 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
3026 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
3027 }
3028
622348f9
MN
3029 sum -= ABS(temp[8*0] + temp[8*4]); // -mean
3030
1457ab52
MN
3031 return sum;
3032}
3033
bb198e19 3034static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
1457ab52 3035 MpegEncContext * const s= (MpegEncContext *)c;
76fbb024
MN
3036 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
3037 DCTELEM * const temp= (DCTELEM*)aligned_temp;
1457ab52 3038 int sum=0, i;
bb198e19
MN
3039
3040 assert(h==8);
1457ab52
MN
3041
3042 s->dsp.diff_pixels(temp, src1, src2, stride);
b0368839 3043 s->dsp.fdct(temp);
1457ab52
MN
3044
3045 for(i=0; i<64; i++)
3046 sum+= ABS(temp[i]);
3047
3048 return sum;
3049}
3050
0e15384d 3051void simple_idct(DCTELEM *block); //FIXME
1457ab52 3052
bb198e19 3053static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
1457ab52 3054 MpegEncContext * const s= (MpegEncContext *)c;
76fbb024
MN
3055 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64*2/8];
3056 DCTELEM * const temp= (DCTELEM*)aligned_temp;
3057 DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64;
1457ab52
MN
3058 int sum=0, i;
3059
bb198e19 3060 assert(h==8);
1457ab52
MN
3061 s->mb_intra=0;
3062
3063 s->dsp.diff_pixels(temp, src1, src2, stride);
3064
3065 memcpy(bak, temp, 64*sizeof(DCTELEM));
3066
67725183 3067 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
d50635cd 3068 s->dct_unquantize_inter(s, temp, 0, s->qscale);
1457ab52
MN
3069 simple_idct(temp); //FIXME
3070
3071 for(i=0; i<64; i++)
3072 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
3073
3074 return sum;
3075}
3076
bb198e19 3077static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3a87ac94 3078 MpegEncContext * const s= (MpegEncContext *)c;
0c1a9eda 3079 const uint8_t *scantable= s->intra_scantable.permutated;
76fbb024
MN
3080 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
3081 uint64_t __align8 aligned_bak[stride];
3082 DCTELEM * const temp= (DCTELEM*)aligned_temp;
3083 uint8_t * const bak= (uint8_t*)aligned_bak;
3a87ac94
MN
3084 int i, last, run, bits, level, distoration, start_i;
3085 const int esc_length= s->ac_esc_length;
3086 uint8_t * length;
3087 uint8_t * last_length;
67725183 3088
bb198e19
MN
3089 assert(h==8);
3090
67725183
MN
3091 for(i=0; i<8; i++){
3092 ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0];
3093 ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1];
3094 }
3a87ac94 3095
67725183
MN
3096 s->dsp.diff_pixels(temp, src1, src2, stride);
3097
3098 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
3099
3100 bits=0;
3a87ac94
MN
3101
3102 if (s->mb_intra) {
67725183 3103 start_i = 1;
3a87ac94
MN
3104 length = s->intra_ac_vlc_length;
3105 last_length= s->intra_ac_vlc_last_length;
67725183 3106 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3a87ac94
MN
3107 } else {
3108 start_i = 0;
3109 length = s->inter_ac_vlc_length;
3110 last_length= s->inter_ac_vlc_last_length;
3111 }
3a87ac94 3112
67725183 3113 if(last>=start_i){
3a87ac94
MN
3114 run=0;
3115 for(i=start_i; i<last; i++){
3116 int j= scantable[i];
3117 level= temp[j];
3118
3119 if(level){
3120 level+=64;
3121 if((level&(~127)) == 0){
3122 bits+= length[UNI_AC_ENC_INDEX(run, level)];
3123 }else
3124 bits+= esc_length;
3125 run=0;
3126 }else
3127 run++;
3128 }
3129 i= scantable[last];
1d0eab1d 3130
3a87ac94 3131 level= temp[i] + 64;
1d0eab1d
MN
3132
3133 assert(level - 64);
3134
3a87ac94
MN
3135 if((level&(~127)) == 0){
3136 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
3137 }else
3138 bits+= esc_length;
3139
67725183
MN
3140 }
3141
3142 if(last>=0){
d50635cd
MN
3143 if(s->mb_intra)
3144 s->dct_unquantize_intra(s, temp, 0, s->qscale);
3145 else
3146 s->dct_unquantize_inter(s, temp, 0, s->qscale);
3a87ac94
MN
3147 }
3148
b0368839 3149 s->dsp.idct_add(bak, stride, temp);
3a87ac94 3150
bb198e19 3151 distoration= s->dsp.sse[1](NULL, bak, src1, stride, 8);
3a87ac94 3152
67725183 3153 return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7);
3a87ac94
MN
3154}
3155
bb198e19 3156static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3a87ac94 3157 MpegEncContext * const s= (MpegEncContext *)c;
0c1a9eda 3158 const uint8_t *scantable= s->intra_scantable.permutated;
76fbb024
MN
3159 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
3160 DCTELEM * const temp= (DCTELEM*)aligned_temp;
3a87ac94
MN
3161 int i, last, run, bits, level, start_i;
3162 const int esc_length= s->ac_esc_length;
3163 uint8_t * length;
3164 uint8_t * last_length;
bb198e19
MN
3165
3166 assert(h==8);
67725183
MN
3167
3168 s->dsp.diff_pixels(temp, src1, src2, stride);
3a87ac94 3169
67725183
MN
3170 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
3171
3172 bits=0;
3a87ac94
MN
3173
3174 if (s->mb_intra) {
67725183 3175 start_i = 1;
3a87ac94
MN
3176 length = s->intra_ac_vlc_length;
3177 last_length= s->intra_ac_vlc_last_length;
67725183 3178 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3a87ac94
MN
3179 } else {
3180 start_i = 0;
3181 length = s->inter_ac_vlc_length;
3182 last_length= s->inter_ac_vlc_last_length;
3183 }
3a87ac94 3184
67725183 3185 if(last>=start_i){
3a87ac94
MN
3186 run=0;
3187 for(i=start_i; i<last; i++){
3188 int j= scantable[i];
3189 level= temp[j];
3190
3191 if(level){
3192 level+=64;
3193 if((level&(~127)) == 0){
3194 bits+= length[UNI_AC_ENC_INDEX(run, level)];
3195 }else
3196 bits+= esc_length;
3197 run=0;
3198 }else
3199 run++;
3200 }
3201 i= scantable[last];
67725183
MN
3202
3203 level= temp[i] + 64;
3a87ac94 3204
67725183 3205 assert(level - 64);
3a87ac94 3206
3a87ac94
MN
3207 if((level&(~127)) == 0){
3208 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
3209 }else
3210 bits+= esc_length;
3211 }
3212
3213 return bits;
3214}
3215
622348f9
MN
3216static int vsad_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){
3217 int score=0;
3218 int x,y;
3219
3220 for(y=1; y<h; y++){
3221 for(x=0; x<16; x+=4){
3222 score+= ABS(s[x ] - s[x +stride]) + ABS(s[x+1] - s[x+1+stride])
3223 +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]);
3224 }
3225 s+= stride;
3226 }
3227
3228 return score;
3229}
3230
3231static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
3232 int score=0;
3233 int x,y;
3234
3235 for(y=1; y<h; y++){
3236 for(x=0; x<16; x++){
3237 score+= ABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
3238 }
3239 s1+= stride;
3240 s2+= stride;
3241 }
3242
3243 return score;
3244}
3245
3246#define SQ(a) ((a)*(a))
3247static int vsse_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){
3248 int score=0;
3249 int x,y;
3250
3251 for(y=1; y<h; y++){
3252 for(x=0; x<16; x+=4){
3253 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride])
3254 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
3255 }
3256 s+= stride;
3257 }
3258
3259 return score;
3260}
3261
3262static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
3263 int score=0;
3264 int x,y;
3265
3266 for(y=1; y<h; y++){
3267 for(x=0; x<16; x++){
3268 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
3269 }
3270 s1+= stride;
3271 s2+= stride;
3272 }
3273
3274 return score;
3275}
3276
bb198e19 3277WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
622348f9 3278WARPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
bb198e19
MN
3279WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
3280WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
3281WARPER8_16_SQ(rd8x8_c, rd16_c)
3282WARPER8_16_SQ(bit8x8_c, bit16_c)
1457ab52 3283
b0368839
MN
3284/* XXX: those functions should be suppressed ASAP when all IDCTs are
3285 converted */
3286static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
3287{
3288 j_rev_dct (block);
3289 put_pixels_clamped_c(block, dest, line_size);
3290}
3291static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
3292{
3293 j_rev_dct (block);
3294 add_pixels_clamped_c(block, dest, line_size);
3295}
3296
59cf08ce
FB
3297/* init static data */
3298void dsputil_static_init(void)
e0eac44e 3299{
d2975f8d 3300 int i;
e0eac44e 3301
59cf08ce
FB
3302 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
3303 for(i=0;i<MAX_NEG_CROP;i++) {
3304 cropTbl[i] = 0;
3305 cropTbl[i + MAX_NEG_CROP + 256] = 255;
3306 }
3307
3308 for(i=0;i<512;i++) {
3309 squareTbl[i] = (i - 256) * (i - 256);
3310 }
3311
3312 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
3313}
92ddb692 3314
92ddb692 3315
59cf08ce
FB
3316void dsputil_init(DSPContext* c, AVCodecContext *avctx)
3317{
3318 int i;
de6d9b64 3319
b0368839 3320#ifdef CONFIG_ENCODERS
10acc479 3321 if(avctx->dct_algo==FF_DCT_FASTINT) {
b0368839 3322 c->fdct = fdct_ifast;
48b1f800 3323 c->fdct248 = fdct_ifast248;
10acc479
RS
3324 }
3325 else if(avctx->dct_algo==FF_DCT_FAAN) {
65e4c8c9 3326 c->fdct = ff_faandct;
48b1f800 3327 c->fdct248 = ff_faandct248;
10acc479
RS
3328 }
3329 else {
b0368839 3330 c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
10acc479
RS
3331 c->fdct248 = ff_fdct248_islow;
3332 }
b0368839
MN
3333#endif //CONFIG_ENCODERS
3334
3335 if(avctx->idct_algo==FF_IDCT_INT){
3336 c->idct_put= ff_jref_idct_put;
3337 c->idct_add= ff_jref_idct_add;
4fb518c3 3338 c->idct = j_rev_dct;
b0368839
MN
3339 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
3340 }else{ //accurate/default
3341 c->idct_put= simple_idct_put;
3342 c->idct_add= simple_idct_add;
4fb518c3 3343 c->idct = simple_idct;
b0368839
MN
3344 c->idct_permutation_type= FF_NO_IDCT_PERM;
3345 }
3346
44cb64ee
MM
3347 /* VP3 DSP support */
3348 c->vp3_dsp_init = vp3_dsp_init_c;
116824d0 3349 c->vp3_idct = vp3_idct_c;
44cb64ee 3350
eb4b3dd3
ZK
3351 c->get_pixels = get_pixels_c;
3352 c->diff_pixels = diff_pixels_c;
3353 c->put_pixels_clamped = put_pixels_clamped_c;
f9ed9d85 3354 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
eb4b3dd3
ZK
3355 c->add_pixels_clamped = add_pixels_clamped_c;
3356 c->gmc1 = gmc1_c;
3357 c->gmc = gmc_c;
3358 c->clear_blocks = clear_blocks_c;
3359 c->pix_sum = pix_sum_c;
3360 c->pix_norm1 = pix_norm1_c;
3361
45553457 3362 /* TODO [0] 16 [1] 8 */
bb198e19
MN
3363 c->pix_abs[0][0] = pix_abs16_c;
3364 c->pix_abs[0][1] = pix_abs16_x2_c;
3365 c->pix_abs[0][2] = pix_abs16_y2_c;
3366 c->pix_abs[0][3] = pix_abs16_xy2_c;
3367 c->pix_abs[1][0] = pix_abs8_c;
3368 c->pix_abs[1][1] = pix_abs8_x2_c;
3369 c->pix_abs[1][2] = pix_abs8_y2_c;
3370 c->pix_abs[1][3] = pix_abs8_xy2_c;
eb4b3dd3 3371
45553457
ZK
3372#define dspfunc(PFX, IDX, NUM) \
3373 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
3374 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \
3375 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \
3376 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
3377
3378 dspfunc(put, 0, 16);
3379 dspfunc(put_no_rnd, 0, 16);
3380 dspfunc(put, 1, 8);
3381 dspfunc(put_no_rnd, 1, 8);
669ac79c
MN
3382 dspfunc(put, 2, 4);
3383 dspfunc(put, 3, 2);
45553457
ZK
3384
3385 dspfunc(avg, 0, 16);
3386 dspfunc(avg_no_rnd, 0, 16);
3387 dspfunc(avg, 1, 8);
3388 dspfunc(avg_no_rnd, 1, 8);
da3b9756
MM
3389 dspfunc(avg, 2, 4);
3390 dspfunc(avg, 3, 2);
45553457
ZK
3391#undef dspfunc
3392
c0a0170c
MN
3393 c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c;
3394 c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c;
3395
669ac79c
MN
3396 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
3397 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
3398 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
3399 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
3400 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
3401 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
3402 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
3403 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
3404 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
3405
da3b9756
MM
3406 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
3407 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
3408 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
3409 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
3410 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
3411 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
3412 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
3413 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
3414 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
3415
45553457
ZK
3416#define dspfunc(PFX, IDX, NUM) \
3417 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
3418 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
3419 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
3420 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
3421 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
3422 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
3423 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
3424 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
3425 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
3426 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
3427 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
3428 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
3429 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
3430 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
3431 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
3432 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
3433
3434 dspfunc(put_qpel, 0, 16);
3435 dspfunc(put_no_rnd_qpel, 0, 16);
3436
3437 dspfunc(avg_qpel, 0, 16);
3438 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
3439
3440 dspfunc(put_qpel, 1, 8);
3441 dspfunc(put_no_rnd_qpel, 1, 8);
3442
3443 dspfunc(avg_qpel, 1, 8);
3444 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
0da71265
MN
3445
3446 dspfunc(put_h264_qpel, 0, 16);
3447 dspfunc(put_h264_qpel, 1, 8);
3448 dspfunc(put_h264_qpel, 2, 4);
3449 dspfunc(avg_h264_qpel, 0, 16);
3450 dspfunc(avg_h264_qpel, 1, 8);
3451 dspfunc(avg_h264_qpel, 2, 4);
3452
45553457 3453#undef dspfunc
0da71265
MN
3454 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
3455 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
3456 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
3457 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
3458 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
3459 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
c9a2ebc4 3460
1457ab52
MN
3461 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
3462 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
3463 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
3464 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
3465 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
3466 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
3467 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
3468 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
669ac79c 3469
bb198e19
MN
3470#define SET_CMP_FUNC(name) \
3471 c->name[0]= name ## 16_c;\
3472 c->name[1]= name ## 8x8_c;
3473
3474 SET_CMP_FUNC(hadamard8_diff)
622348f9 3475 c->hadamard8_diff[4]= hadamard8_intra16_c;
bb198e19
MN
3476 SET_CMP_FUNC(dct_sad)
3477 c->sad[0]= pix_abs16_c;
3478 c->sad[1]= pix_abs8_c;
3479 c->sse[0]= sse16_c;
3480 c->sse[1]= sse8_c;
26efc54e 3481 c->sse[2]= sse4_c;
bb198e19
MN
3482 SET_CMP_FUNC(quant_psnr)
3483 SET_CMP_FUNC(rd)
3484 SET_CMP_FUNC(bit)
622348f9
MN
3485 c->vsad[0]= vsad16_c;
3486 c->vsad[4]= vsad_intra16_c;
3487 c->vsse[0]= vsse16_c;
3488 c->vsse[4]= vsse_intra16_c;
e6a2ac34
MN
3489 c->nsse[0]= nsse16_c;
3490 c->nsse[1]= nsse8_c;
26efc54e
MN
3491 c->w53[0]= w53_16_c;
3492 c->w53[1]= w53_8_c;
3493 c->w97[0]= w97_16_c;
3494 c->w97[1]= w97_8_c;
3495
11f18faf
MN
3496 c->add_bytes= add_bytes_c;
3497 c->diff_bytes= diff_bytes_c;
84705403 3498 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
3d2e8cce 3499 c->bswap_buf= bswap_buf;
332f9ac4
MN
3500
3501 c->h263_h_loop_filter= h263_h_loop_filter_c;
3502 c->h263_v_loop_filter= h263_v_loop_filter_c;
364a1797 3503
fdbbf2e0 3504 c->h261_loop_filter= h261_loop_filter_c;
c6148de2 3505
364a1797
MN
3506 c->try_8x8basis= try_8x8basis_c;
3507 c->add_8x8basis= add_8x8basis_c;
11f18faf 3508
980fc7b8 3509#ifdef HAVE_MMX
b0368839 3510 dsputil_init_mmx(c, avctx);
de6d9b64 3511#endif
3d03c0a2 3512#ifdef ARCH_ARMV4L
b0368839 3513 dsputil_init_armv4l(c, avctx);
3d03c0a2 3514#endif
c34270f5 3515#ifdef HAVE_MLIB
b0368839 3516 dsputil_init_mlib(c, avctx);
c34270f5 3517#endif
44f54ceb
MN
3518#ifdef ARCH_SPARC
3519 dsputil_init_vis(c,avctx);
3520#endif
1e98dffb 3521#ifdef ARCH_ALPHA
b0368839 3522 dsputil_init_alpha(c, avctx);
1e98dffb 3523#endif
59925ef2 3524#ifdef ARCH_POWERPC
b0368839 3525 dsputil_init_ppc(c, avctx);
a43bd1d7 3526#endif
d46aba26 3527#ifdef HAVE_MMI
b0368839 3528 dsputil_init_mmi(c, avctx);
d46aba26 3529#endif
0c6bd2ea
B
3530#ifdef ARCH_SH4
3531 dsputil_init_sh4(c,avctx);
3532#endif
43f1708f 3533
b0368839
MN
3534 switch(c->idct_permutation_type){
3535 case FF_NO_IDCT_PERM:
3536 for(i=0; i<64; i++)
3537 c->idct_permutation[i]= i;
3538 break;
3539 case FF_LIBMPEG2_IDCT_PERM:
3540 for(i=0; i<64; i++)
3541 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
3542 break;
3543 case FF_SIMPLE_IDCT_PERM:
3544 for(i=0; i<64; i++)
3545 c->idct_permutation[i]= simple_mmx_permutation[i];
3546 break;
3547 case FF_TRANSPOSE_IDCT_PERM:
3548 for(i=0; i<64; i++)
3549 c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
3550 break;
3551 default:
9b879566 3552 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
b0368839 3553 }
57060b1e 3554}
b0368839 3555