1/2 resolution decoding
[libav.git] / libavcodec / dsputil.c
CommitLineData
de6d9b64
FB
1/*
2 * DSP utils
ff4ec49e 3 * Copyright (c) 2000, 2001 Fabrice Bellard.
8f2ab833 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
de6d9b64 5 *
ff4ec49e
FB
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
de6d9b64 10 *
ff4ec49e 11 * This library is distributed in the hope that it will be useful,
de6d9b64 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ff4ec49e
FB
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
de6d9b64 15 *
ff4ec49e
FB
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
7ff037e9 19 *
59fe111e 20 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
de6d9b64 21 */
983e3246
MN
22
23/**
24 * @file dsputil.c
25 * DSP utils
26 */
27
de6d9b64
FB
28#include "avcodec.h"
29#include "dsputil.h"
1457ab52 30#include "mpegvideo.h"
b0368839 31#include "simple_idct.h"
65e4c8c9 32#include "faandct.h"
5596c60c 33
8b69867f
MN
34uint8_t cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
35uint32_t squareTbl[512] = {0, };
de6d9b64 36
0c1a9eda 37const uint8_t ff_zigzag_direct[64] = {
2ad1516a
MN
38 0, 1, 8, 16, 9, 2, 3, 10,
39 17, 24, 32, 25, 18, 11, 4, 5,
e0eac44e 40 12, 19, 26, 33, 40, 48, 41, 34,
2ad1516a 41 27, 20, 13, 6, 7, 14, 21, 28,
e0eac44e
FB
42 35, 42, 49, 56, 57, 50, 43, 36,
43 29, 22, 15, 23, 30, 37, 44, 51,
44 58, 59, 52, 45, 38, 31, 39, 46,
45 53, 60, 61, 54, 47, 55, 62, 63
46};
47
10acc479
RS
48/* Specific zigzag scan for 248 idct. NOTE that unlike the
49 specification, we interleave the fields */
50const uint8_t ff_zigzag248_direct[64] = {
51 0, 8, 1, 9, 16, 24, 2, 10,
52 17, 25, 32, 40, 48, 56, 33, 41,
53 18, 26, 3, 11, 4, 12, 19, 27,
54 34, 42, 49, 57, 50, 58, 35, 43,
55 20, 28, 5, 13, 6, 14, 21, 29,
56 36, 44, 51, 59, 52, 60, 37, 45,
57 22, 30, 7, 15, 23, 31, 38, 46,
58 53, 61, 54, 62, 39, 47, 55, 63,
59};
60
2f349de2 61/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
8b69867f 62uint16_t __align8 inv_zigzag_direct16[64] = {0, };
2f349de2 63
0c1a9eda 64const uint8_t ff_alternate_horizontal_scan[64] = {
2ad1516a 65 0, 1, 2, 3, 8, 9, 16, 17,
e0eac44e
FB
66 10, 11, 4, 5, 6, 7, 15, 14,
67 13, 12, 19, 18, 24, 25, 32, 33,
68 26, 27, 20, 21, 22, 23, 28, 29,
69 30, 31, 34, 35, 40, 41, 48, 49,
70 42, 43, 36, 37, 38, 39, 44, 45,
71 46, 47, 50, 51, 56, 57, 58, 59,
72 52, 53, 54, 55, 60, 61, 62, 63,
73};
74
0c1a9eda 75const uint8_t ff_alternate_vertical_scan[64] = {
2ad1516a 76 0, 8, 16, 24, 1, 9, 2, 10,
e0eac44e
FB
77 17, 25, 32, 40, 48, 56, 57, 49,
78 41, 33, 26, 18, 3, 11, 4, 12,
79 19, 27, 34, 42, 50, 58, 35, 43,
80 51, 59, 20, 28, 5, 13, 6, 14,
81 21, 29, 36, 44, 52, 60, 37, 45,
82 53, 61, 22, 30, 7, 15, 23, 31,
83 38, 46, 54, 62, 39, 47, 55, 63,
84};
85
2f349de2 86/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
0c1a9eda 87const uint32_t inverse[256]={
2f349de2
MN
88 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
89 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
90 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
91 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
92 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
93 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
94 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
95 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
96 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
97 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
98 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
99 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
100 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
101 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
102 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
103 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
104 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
105 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
106 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
107 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
108 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
109 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
110 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
111 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
112 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
113 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
114 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
115 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
116 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
117 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
118 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
119 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
120};
121
b0368839
MN
122/* Input permutation for the simple_idct_mmx */
123static const uint8_t simple_mmx_permutation[64]={
124 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
125 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
126 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
127 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
128 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
129 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
130 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
131 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
132};
133
0c1a9eda 134static int pix_sum_c(uint8_t * pix, int line_size)
3aa102be
MN
135{
136 int s, i, j;
137
138 s = 0;
139 for (i = 0; i < 16; i++) {
140 for (j = 0; j < 16; j += 8) {
141 s += pix[0];
142 s += pix[1];
143 s += pix[2];
144 s += pix[3];
145 s += pix[4];
146 s += pix[5];
147 s += pix[6];
148 s += pix[7];
149 pix += 8;
150 }
151 pix += line_size - 16;
152 }
153 return s;
154}
155
0c1a9eda 156static int pix_norm1_c(uint8_t * pix, int line_size)
3aa102be
MN
157{
158 int s, i, j;
0c1a9eda 159 uint32_t *sq = squareTbl + 256;
3aa102be
MN
160
161 s = 0;
162 for (i = 0; i < 16; i++) {
163 for (j = 0; j < 16; j += 8) {
2a006cd3 164#if 0
3aa102be
MN
165 s += sq[pix[0]];
166 s += sq[pix[1]];
167 s += sq[pix[2]];
168 s += sq[pix[3]];
169 s += sq[pix[4]];
170 s += sq[pix[5]];
171 s += sq[pix[6]];
172 s += sq[pix[7]];
2a006cd3
FL
173#else
174#if LONG_MAX > 2147483647
175 register uint64_t x=*(uint64_t*)pix;
176 s += sq[x&0xff];
177 s += sq[(x>>8)&0xff];
178 s += sq[(x>>16)&0xff];
179 s += sq[(x>>24)&0xff];
180 s += sq[(x>>32)&0xff];
181 s += sq[(x>>40)&0xff];
182 s += sq[(x>>48)&0xff];
183 s += sq[(x>>56)&0xff];
184#else
185 register uint32_t x=*(uint32_t*)pix;
186 s += sq[x&0xff];
187 s += sq[(x>>8)&0xff];
188 s += sq[(x>>16)&0xff];
189 s += sq[(x>>24)&0xff];
190 x=*(uint32_t*)(pix+4);
191 s += sq[x&0xff];
192 s += sq[(x>>8)&0xff];
193 s += sq[(x>>16)&0xff];
194 s += sq[(x>>24)&0xff];
195#endif
196#endif
3aa102be
MN
197 pix += 8;
198 }
199 pix += line_size - 16;
200 }
201 return s;
202}
203
3d2e8cce
MN
204static void bswap_buf(uint32_t *dst, uint32_t *src, int w){
205 int i;
206
207 for(i=0; i+8<=w; i+=8){
208 dst[i+0]= bswap_32(src[i+0]);
209 dst[i+1]= bswap_32(src[i+1]);
210 dst[i+2]= bswap_32(src[i+2]);
211 dst[i+3]= bswap_32(src[i+3]);
212 dst[i+4]= bswap_32(src[i+4]);
213 dst[i+5]= bswap_32(src[i+5]);
214 dst[i+6]= bswap_32(src[i+6]);
215 dst[i+7]= bswap_32(src[i+7]);
216 }
217 for(;i<w; i++){
218 dst[i+0]= bswap_32(src[i+0]);
219 }
220}
3aa102be 221
26efc54e
MN
222static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
223{
224 int s, i;
225 uint32_t *sq = squareTbl + 256;
226
227 s = 0;
228 for (i = 0; i < h; i++) {
229 s += sq[pix1[0] - pix2[0]];
230 s += sq[pix1[1] - pix2[1]];
231 s += sq[pix1[2] - pix2[2]];
232 s += sq[pix1[3] - pix2[3]];
233 pix1 += line_size;
234 pix2 += line_size;
235 }
236 return s;
237}
238
bb198e19 239static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
1457ab52
MN
240{
241 int s, i;
0c1a9eda 242 uint32_t *sq = squareTbl + 256;
1457ab52
MN
243
244 s = 0;
bb198e19 245 for (i = 0; i < h; i++) {
1457ab52
MN
246 s += sq[pix1[0] - pix2[0]];
247 s += sq[pix1[1] - pix2[1]];
248 s += sq[pix1[2] - pix2[2]];
249 s += sq[pix1[3] - pix2[3]];
250 s += sq[pix1[4] - pix2[4]];
251 s += sq[pix1[5] - pix2[5]];
252 s += sq[pix1[6] - pix2[6]];
253 s += sq[pix1[7] - pix2[7]];
254 pix1 += line_size;
255 pix2 += line_size;
256 }
257 return s;
258}
259
bb198e19 260static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
9c76bd48 261{
6b026927
FH
262 int s, i;
263 uint32_t *sq = squareTbl + 256;
9c76bd48
BF
264
265 s = 0;
bb198e19 266 for (i = 0; i < h; i++) {
6b026927
FH
267 s += sq[pix1[ 0] - pix2[ 0]];
268 s += sq[pix1[ 1] - pix2[ 1]];
269 s += sq[pix1[ 2] - pix2[ 2]];
270 s += sq[pix1[ 3] - pix2[ 3]];
271 s += sq[pix1[ 4] - pix2[ 4]];
272 s += sq[pix1[ 5] - pix2[ 5]];
273 s += sq[pix1[ 6] - pix2[ 6]];
274 s += sq[pix1[ 7] - pix2[ 7]];
275 s += sq[pix1[ 8] - pix2[ 8]];
276 s += sq[pix1[ 9] - pix2[ 9]];
277 s += sq[pix1[10] - pix2[10]];
278 s += sq[pix1[11] - pix2[11]];
279 s += sq[pix1[12] - pix2[12]];
280 s += sq[pix1[13] - pix2[13]];
281 s += sq[pix1[14] - pix2[14]];
282 s += sq[pix1[15] - pix2[15]];
2a006cd3 283
6b026927
FH
284 pix1 += line_size;
285 pix2 += line_size;
9c76bd48
BF
286 }
287 return s;
288}
289
26efc54e
MN
290
291static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){
292 int s, i, j;
293 const int dec_count= w==8 ? 3 : 4;
294 int tmp[16*16];
295#if 0
296 int level, ori;
297 static const int scale[2][2][4][4]={
298 {
299 {
300 //8x8 dec=3
301 {268, 239, 239, 213},
302 { 0, 224, 224, 152},
303 { 0, 135, 135, 110},
304 },{
305 //16x16 dec=4
306 {344, 310, 310, 280},
307 { 0, 320, 320, 228},
308 { 0, 175, 175, 136},
309 { 0, 129, 129, 102},
310 }
311 },{
312 {//FIXME 5/3
313 //8x8 dec=3
314 {275, 245, 245, 218},
315 { 0, 230, 230, 156},
316 { 0, 138, 138, 113},
317 },{
318 //16x16 dec=4
319 {352, 317, 317, 286},
320 { 0, 328, 328, 233},
321 { 0, 180, 180, 140},
322 { 0, 132, 132, 105},
323 }
324 }
325 };
326#endif
327
328 for (i = 0; i < h; i++) {
329 for (j = 0; j < w; j+=4) {
330 tmp[16*i+j+0] = (pix1[j+0] - pix2[j+0])<<4;
331 tmp[16*i+j+1] = (pix1[j+1] - pix2[j+1])<<4;
332 tmp[16*i+j+2] = (pix1[j+2] - pix2[j+2])<<4;
333 tmp[16*i+j+3] = (pix1[j+3] - pix2[j+3])<<4;
334 }
335 pix1 += line_size;
336 pix2 += line_size;
337 }
338 ff_spatial_dwt(tmp, w, h, 16, type, dec_count);
339
340 s=0;
341#if 0
342 for(level=0; level<dec_count; level++){
343 for(ori= level ? 1 : 0; ori<4; ori++){
344 int sx= (ori&1) ? 1<<level: 0;
345 int stride= 16<<(dec_count-level);
346 int sy= (ori&2) ? stride>>1 : 0;
347 int size= 1<<level;
348
349 for(i=0; i<size; i++){
350 for(j=0; j<size; j++){
351 int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
352 s += ABS(v);
353 }
354 }
355 }
356 }
357#endif
358 for (i = 0; i < h; i++) {
359 for (j = 0; j < w; j+=4) {
360 s+= ABS(tmp[16*i+j+0]);
361 s+= ABS(tmp[16*i+j+1]);
362 s+= ABS(tmp[16*i+j+2]);
363 s+= ABS(tmp[16*i+j+3]);
364 }
365 }
366 assert(s>=0);
367
368 return s>>2;
369}
370
371static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
372 return w_c(v, pix1, pix2, line_size, 8, h, 1);
373}
374
375static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
376 return w_c(v, pix1, pix2, line_size, 8, h, 0);
377}
378
379static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
380 return w_c(v, pix1, pix2, line_size, 16, h, 1);
381}
382
383static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
384 return w_c(v, pix1, pix2, line_size, 16, h, 0);
385}
386
0c1a9eda 387static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
de6d9b64 388{
de6d9b64
FB
389 int i;
390
391 /* read the pixels */
de6d9b64 392 for(i=0;i<8;i++) {
c13e1abd
FH
393 block[0] = pixels[0];
394 block[1] = pixels[1];
395 block[2] = pixels[2];
396 block[3] = pixels[3];
397 block[4] = pixels[4];
398 block[5] = pixels[5];
399 block[6] = pixels[6];
400 block[7] = pixels[7];
401 pixels += line_size;
402 block += 8;
de6d9b64
FB
403 }
404}
405
0c1a9eda
ZK
406static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
407 const uint8_t *s2, int stride){
9dbcbd92
MN
408 int i;
409
410 /* read the pixels */
9dbcbd92 411 for(i=0;i<8;i++) {
c13e1abd
FH
412 block[0] = s1[0] - s2[0];
413 block[1] = s1[1] - s2[1];
414 block[2] = s1[2] - s2[2];
415 block[3] = s1[3] - s2[3];
416 block[4] = s1[4] - s2[4];
417 block[5] = s1[5] - s2[5];
418 block[6] = s1[6] - s2[6];
419 block[7] = s1[7] - s2[7];
9dbcbd92
MN
420 s1 += stride;
421 s2 += stride;
c13e1abd 422 block += 8;
9dbcbd92
MN
423 }
424}
425
426
0c1a9eda 427static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
eb4b3dd3 428 int line_size)
de6d9b64 429{
de6d9b64 430 int i;
0c1a9eda 431 uint8_t *cm = cropTbl + MAX_NEG_CROP;
de6d9b64
FB
432
433 /* read the pixels */
de6d9b64 434 for(i=0;i<8;i++) {
c13e1abd
FH
435 pixels[0] = cm[block[0]];
436 pixels[1] = cm[block[1]];
437 pixels[2] = cm[block[2]];
438 pixels[3] = cm[block[3]];
439 pixels[4] = cm[block[4]];
440 pixels[5] = cm[block[5]];
441 pixels[6] = cm[block[6]];
442 pixels[7] = cm[block[7]];
443
444 pixels += line_size;
445 block += 8;
de6d9b64
FB
446 }
447}
448
178fcca8
MN
449static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
450 int line_size)
451{
452 int i;
453 uint8_t *cm = cropTbl + MAX_NEG_CROP;
454
455 /* read the pixels */
456 for(i=0;i<4;i++) {
457 pixels[0] = cm[block[0]];
458 pixels[1] = cm[block[1]];
459 pixels[2] = cm[block[2]];
460 pixels[3] = cm[block[3]];
461
462 pixels += line_size;
463 block += 8;
464 }
465}
466
f9ed9d85
MM
467static void put_signed_pixels_clamped_c(const DCTELEM *block,
468 uint8_t *restrict pixels,
469 int line_size)
470{
471 int i, j;
472
473 for (i = 0; i < 8; i++) {
474 for (j = 0; j < 8; j++) {
475 if (*block < -128)
476 *pixels = 0;
477 else if (*block > 127)
478 *pixels = 255;
479 else
480 *pixels = (uint8_t)(*block + 128);
481 block++;
482 pixels++;
483 }
484 pixels += (line_size - 8);
485 }
486}
487
0c1a9eda 488static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
c13e1abd 489 int line_size)
de6d9b64 490{
de6d9b64 491 int i;
0c1a9eda 492 uint8_t *cm = cropTbl + MAX_NEG_CROP;
de6d9b64
FB
493
494 /* read the pixels */
de6d9b64 495 for(i=0;i<8;i++) {
c13e1abd
FH
496 pixels[0] = cm[pixels[0] + block[0]];
497 pixels[1] = cm[pixels[1] + block[1]];
498 pixels[2] = cm[pixels[2] + block[2]];
499 pixels[3] = cm[pixels[3] + block[3]];
500 pixels[4] = cm[pixels[4] + block[4]];
501 pixels[5] = cm[pixels[5] + block[5]];
502 pixels[6] = cm[pixels[6] + block[6]];
503 pixels[7] = cm[pixels[7] + block[7]];
504 pixels += line_size;
505 block += 8;
de6d9b64
FB
506 }
507}
178fcca8
MN
508
509static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
510 int line_size)
511{
512 int i;
513 uint8_t *cm = cropTbl + MAX_NEG_CROP;
514
515 /* read the pixels */
516 for(i=0;i<4;i++) {
517 pixels[0] = cm[pixels[0] + block[0]];
518 pixels[1] = cm[pixels[1] + block[1]];
519 pixels[2] = cm[pixels[2] + block[2]];
520 pixels[3] = cm[pixels[3] + block[3]];
521 pixels += line_size;
522 block += 8;
523 }
524}
59fe111e
MN
525#if 0
526
527#define PIXOP2(OPNAME, OP) \
b3184779 528static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
529{\
530 int i;\
531 for(i=0; i<h; i++){\
532 OP(*((uint64_t*)block), LD64(pixels));\
533 pixels+=line_size;\
534 block +=line_size;\
535 }\
536}\
537\
45553457 538static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
539{\
540 int i;\
541 for(i=0; i<h; i++){\
542 const uint64_t a= LD64(pixels );\
543 const uint64_t b= LD64(pixels+1);\
544 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
545 pixels+=line_size;\
546 block +=line_size;\
547 }\
548}\
549\
45553457 550static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
551{\
552 int i;\
553 for(i=0; i<h; i++){\
554 const uint64_t a= LD64(pixels );\
555 const uint64_t b= LD64(pixels+1);\
556 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
557 pixels+=line_size;\
558 block +=line_size;\
559 }\
560}\
561\
45553457 562static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
563{\
564 int i;\
565 for(i=0; i<h; i++){\
566 const uint64_t a= LD64(pixels );\
567 const uint64_t b= LD64(pixels+line_size);\
568 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
569 pixels+=line_size;\
570 block +=line_size;\
571 }\
572}\
573\
45553457 574static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
575{\
576 int i;\
577 for(i=0; i<h; i++){\
578 const uint64_t a= LD64(pixels );\
579 const uint64_t b= LD64(pixels+line_size);\
580 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
581 pixels+=line_size;\
582 block +=line_size;\
583 }\
584}\
585\
45553457 586static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
587{\
588 int i;\
589 const uint64_t a= LD64(pixels );\
590 const uint64_t b= LD64(pixels+1);\
591 uint64_t l0= (a&0x0303030303030303ULL)\
592 + (b&0x0303030303030303ULL)\
593 + 0x0202020202020202ULL;\
594 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
595 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
596 uint64_t l1,h1;\
597\
598 pixels+=line_size;\
599 for(i=0; i<h; i+=2){\
600 uint64_t a= LD64(pixels );\
601 uint64_t b= LD64(pixels+1);\
602 l1= (a&0x0303030303030303ULL)\
603 + (b&0x0303030303030303ULL);\
604 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
605 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
606 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
607 pixels+=line_size;\
608 block +=line_size;\
609 a= LD64(pixels );\
610 b= LD64(pixels+1);\
611 l0= (a&0x0303030303030303ULL)\
612 + (b&0x0303030303030303ULL)\
613 + 0x0202020202020202ULL;\
614 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
615 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
616 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
617 pixels+=line_size;\
618 block +=line_size;\
619 }\
620}\
621\
45553457 622static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
623{\
624 int i;\
625 const uint64_t a= LD64(pixels );\
626 const uint64_t b= LD64(pixels+1);\
627 uint64_t l0= (a&0x0303030303030303ULL)\
628 + (b&0x0303030303030303ULL)\
629 + 0x0101010101010101ULL;\
630 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
631 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
632 uint64_t l1,h1;\
633\
634 pixels+=line_size;\
635 for(i=0; i<h; i+=2){\
636 uint64_t a= LD64(pixels );\
637 uint64_t b= LD64(pixels+1);\
638 l1= (a&0x0303030303030303ULL)\
639 + (b&0x0303030303030303ULL);\
640 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
641 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
642 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
643 pixels+=line_size;\
644 block +=line_size;\
645 a= LD64(pixels );\
646 b= LD64(pixels+1);\
647 l0= (a&0x0303030303030303ULL)\
648 + (b&0x0303030303030303ULL)\
649 + 0x0101010101010101ULL;\
650 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
651 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
652 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
653 pixels+=line_size;\
654 block +=line_size;\
655 }\
656}\
657\
45553457
ZK
658CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
659CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
660CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
661CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
662CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
663CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
664CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
59fe111e
MN
665
666#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
667#else // 64 bit variant
668
669#define PIXOP2(OPNAME, OP) \
669ac79c
MN
670static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
671 int i;\
672 for(i=0; i<h; i++){\
673 OP(*((uint16_t*)(block )), LD16(pixels ));\
674 pixels+=line_size;\
675 block +=line_size;\
676 }\
677}\
0da71265
MN
678static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
679 int i;\
680 for(i=0; i<h; i++){\
681 OP(*((uint32_t*)(block )), LD32(pixels ));\
682 pixels+=line_size;\
683 block +=line_size;\
684 }\
685}\
45553457 686static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
59fe111e
MN
687 int i;\
688 for(i=0; i<h; i++){\
689 OP(*((uint32_t*)(block )), LD32(pixels ));\
690 OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
691 pixels+=line_size;\
692 block +=line_size;\
693 }\
694}\
45553457
ZK
695static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
696 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
b3184779 697}\
59fe111e 698\
b3184779
MN
699static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
700 int src_stride1, int src_stride2, int h){\
59fe111e
MN
701 int i;\
702 for(i=0; i<h; i++){\
b3184779
MN
703 uint32_t a,b;\
704 a= LD32(&src1[i*src_stride1 ]);\
705 b= LD32(&src2[i*src_stride2 ]);\
d8085ea7 706 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
b3184779
MN
707 a= LD32(&src1[i*src_stride1+4]);\
708 b= LD32(&src2[i*src_stride2+4]);\
d8085ea7 709 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
59fe111e
MN
710 }\
711}\
712\
b3184779
MN
713static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
714 int src_stride1, int src_stride2, int h){\
59fe111e
MN
715 int i;\
716 for(i=0; i<h; i++){\
b3184779
MN
717 uint32_t a,b;\
718 a= LD32(&src1[i*src_stride1 ]);\
719 b= LD32(&src2[i*src_stride2 ]);\
d8085ea7 720 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
b3184779
MN
721 a= LD32(&src1[i*src_stride1+4]);\
722 b= LD32(&src2[i*src_stride2+4]);\
d8085ea7 723 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
59fe111e
MN
724 }\
725}\
726\
0da71265
MN
727static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
728 int src_stride1, int src_stride2, int h){\
729 int i;\
730 for(i=0; i<h; i++){\
731 uint32_t a,b;\
732 a= LD32(&src1[i*src_stride1 ]);\
733 b= LD32(&src2[i*src_stride2 ]);\
d8085ea7 734 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
0da71265
MN
735 }\
736}\
737\
669ac79c
MN
738static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
739 int src_stride1, int src_stride2, int h){\
740 int i;\
741 for(i=0; i<h; i++){\
742 uint32_t a,b;\
743 a= LD16(&src1[i*src_stride1 ]);\
744 b= LD16(&src2[i*src_stride2 ]);\
745 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
746 }\
747}\
748\
b3184779
MN
749static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
750 int src_stride1, int src_stride2, int h){\
751 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
752 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
753}\
754\
755static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
756 int src_stride1, int src_stride2, int h){\
757 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
758 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
759}\
760\
45553457 761static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
762 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
763}\
764\
45553457 765static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
766 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
767}\
768\
45553457 769static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
770 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
771}\
772\
45553457 773static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
774 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
775}\
776\
777static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
778 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
59fe111e
MN
779 int i;\
780 for(i=0; i<h; i++){\
b3184779
MN
781 uint32_t a, b, c, d, l0, l1, h0, h1;\
782 a= LD32(&src1[i*src_stride1]);\
783 b= LD32(&src2[i*src_stride2]);\
784 c= LD32(&src3[i*src_stride3]);\
785 d= LD32(&src4[i*src_stride4]);\
786 l0= (a&0x03030303UL)\
787 + (b&0x03030303UL)\
788 + 0x02020202UL;\
789 h0= ((a&0xFCFCFCFCUL)>>2)\
790 + ((b&0xFCFCFCFCUL)>>2);\
791 l1= (c&0x03030303UL)\
792 + (d&0x03030303UL);\
793 h1= ((c&0xFCFCFCFCUL)>>2)\
794 + ((d&0xFCFCFCFCUL)>>2);\
795 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
796 a= LD32(&src1[i*src_stride1+4]);\
797 b= LD32(&src2[i*src_stride2+4]);\
798 c= LD32(&src3[i*src_stride3+4]);\
799 d= LD32(&src4[i*src_stride4+4]);\
800 l0= (a&0x03030303UL)\
801 + (b&0x03030303UL)\
802 + 0x02020202UL;\
803 h0= ((a&0xFCFCFCFCUL)>>2)\
804 + ((b&0xFCFCFCFCUL)>>2);\
805 l1= (c&0x03030303UL)\
806 + (d&0x03030303UL);\
807 h1= ((c&0xFCFCFCFCUL)>>2)\
808 + ((d&0xFCFCFCFCUL)>>2);\
809 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
59fe111e
MN
810 }\
811}\
669ac79c
MN
812\
813static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
814 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
815}\
816\
817static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
818 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
819}\
820\
821static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
822 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
823}\
824\
825static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
826 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
827}\
828\
b3184779
MN
829static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
830 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
59fe111e
MN
831 int i;\
832 for(i=0; i<h; i++){\
b3184779
MN
833 uint32_t a, b, c, d, l0, l1, h0, h1;\
834 a= LD32(&src1[i*src_stride1]);\
835 b= LD32(&src2[i*src_stride2]);\
836 c= LD32(&src3[i*src_stride3]);\
837 d= LD32(&src4[i*src_stride4]);\
838 l0= (a&0x03030303UL)\
839 + (b&0x03030303UL)\
840 + 0x01010101UL;\
841 h0= ((a&0xFCFCFCFCUL)>>2)\
842 + ((b&0xFCFCFCFCUL)>>2);\
843 l1= (c&0x03030303UL)\
844 + (d&0x03030303UL);\
845 h1= ((c&0xFCFCFCFCUL)>>2)\
846 + ((d&0xFCFCFCFCUL)>>2);\
847 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
848 a= LD32(&src1[i*src_stride1+4]);\
849 b= LD32(&src2[i*src_stride2+4]);\
850 c= LD32(&src3[i*src_stride3+4]);\
851 d= LD32(&src4[i*src_stride4+4]);\
852 l0= (a&0x03030303UL)\
853 + (b&0x03030303UL)\
854 + 0x01010101UL;\
855 h0= ((a&0xFCFCFCFCUL)>>2)\
856 + ((b&0xFCFCFCFCUL)>>2);\
857 l1= (c&0x03030303UL)\
858 + (d&0x03030303UL);\
859 h1= ((c&0xFCFCFCFCUL)>>2)\
860 + ((d&0xFCFCFCFCUL)>>2);\
861 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
59fe111e
MN
862 }\
863}\
b3184779
MN
864static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
865 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
866 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
867 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
868}\
869static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
870 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
871 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
872 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
873}\
59fe111e 874\
669ac79c
MN
875static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
876{\
877 int i, a0, b0, a1, b1;\
878 a0= pixels[0];\
879 b0= pixels[1] + 2;\
880 a0 += b0;\
881 b0 += pixels[2];\
882\
883 pixels+=line_size;\
884 for(i=0; i<h; i+=2){\
885 a1= pixels[0];\
886 b1= pixels[1];\
887 a1 += b1;\
888 b1 += pixels[2];\
889\
890 block[0]= (a1+a0)>>2; /* FIXME non put */\
891 block[1]= (b1+b0)>>2;\
892\
893 pixels+=line_size;\
894 block +=line_size;\
895\
896 a0= pixels[0];\
897 b0= pixels[1] + 2;\
898 a0 += b0;\
899 b0 += pixels[2];\
900\
901 block[0]= (a1+a0)>>2;\
902 block[1]= (b1+b0)>>2;\
903 pixels+=line_size;\
904 block +=line_size;\
905 }\
906}\
907\
908static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
909{\
910 int i;\
911 const uint32_t a= LD32(pixels );\
912 const uint32_t b= LD32(pixels+1);\
913 uint32_t l0= (a&0x03030303UL)\
914 + (b&0x03030303UL)\
915 + 0x02020202UL;\
916 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
917 + ((b&0xFCFCFCFCUL)>>2);\
918 uint32_t l1,h1;\
919\
920 pixels+=line_size;\
921 for(i=0; i<h; i+=2){\
922 uint32_t a= LD32(pixels );\
923 uint32_t b= LD32(pixels+1);\
924 l1= (a&0x03030303UL)\
925 + (b&0x03030303UL);\
926 h1= ((a&0xFCFCFCFCUL)>>2)\
927 + ((b&0xFCFCFCFCUL)>>2);\
928 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
929 pixels+=line_size;\
930 block +=line_size;\
931 a= LD32(pixels );\
932 b= LD32(pixels+1);\
933 l0= (a&0x03030303UL)\
934 + (b&0x03030303UL)\
935 + 0x02020202UL;\
936 h0= ((a&0xFCFCFCFCUL)>>2)\
937 + ((b&0xFCFCFCFCUL)>>2);\
938 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
939 pixels+=line_size;\
940 block +=line_size;\
941 }\
942}\
943\
45553457 944static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
945{\
946 int j;\
947 for(j=0; j<2; j++){\
948 int i;\
949 const uint32_t a= LD32(pixels );\
950 const uint32_t b= LD32(pixels+1);\
951 uint32_t l0= (a&0x03030303UL)\
952 + (b&0x03030303UL)\
953 + 0x02020202UL;\
954 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
955 + ((b&0xFCFCFCFCUL)>>2);\
956 uint32_t l1,h1;\
957\
958 pixels+=line_size;\
959 for(i=0; i<h; i+=2){\
960 uint32_t a= LD32(pixels );\
961 uint32_t b= LD32(pixels+1);\
962 l1= (a&0x03030303UL)\
963 + (b&0x03030303UL);\
964 h1= ((a&0xFCFCFCFCUL)>>2)\
965 + ((b&0xFCFCFCFCUL)>>2);\
966 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
967 pixels+=line_size;\
968 block +=line_size;\
969 a= LD32(pixels );\
970 b= LD32(pixels+1);\
971 l0= (a&0x03030303UL)\
972 + (b&0x03030303UL)\
973 + 0x02020202UL;\
974 h0= ((a&0xFCFCFCFCUL)>>2)\
975 + ((b&0xFCFCFCFCUL)>>2);\
976 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
977 pixels+=line_size;\
978 block +=line_size;\
979 }\
980 pixels+=4-line_size*(h+1);\
981 block +=4-line_size*h;\
982 }\
983}\
984\
45553457 985static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
986{\
987 int j;\
988 for(j=0; j<2; j++){\
989 int i;\
990 const uint32_t a= LD32(pixels );\
991 const uint32_t b= LD32(pixels+1);\
992 uint32_t l0= (a&0x03030303UL)\
993 + (b&0x03030303UL)\
994 + 0x01010101UL;\
995 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
996 + ((b&0xFCFCFCFCUL)>>2);\
997 uint32_t l1,h1;\
998\
999 pixels+=line_size;\
1000 for(i=0; i<h; i+=2){\
1001 uint32_t a= LD32(pixels );\
1002 uint32_t b= LD32(pixels+1);\
1003 l1= (a&0x03030303UL)\
1004 + (b&0x03030303UL);\
1005 h1= ((a&0xFCFCFCFCUL)>>2)\
1006 + ((b&0xFCFCFCFCUL)>>2);\
1007 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1008 pixels+=line_size;\
1009 block +=line_size;\
1010 a= LD32(pixels );\
1011 b= LD32(pixels+1);\
1012 l0= (a&0x03030303UL)\
1013 + (b&0x03030303UL)\
1014 + 0x01010101UL;\
1015 h0= ((a&0xFCFCFCFCUL)>>2)\
1016 + ((b&0xFCFCFCFCUL)>>2);\
1017 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1018 pixels+=line_size;\
1019 block +=line_size;\
1020 }\
1021 pixels+=4-line_size*(h+1);\
1022 block +=4-line_size*h;\
1023 }\
1024}\
1025\
45553457
ZK
1026CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
1027CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
1028CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
1029CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
1030CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
1031CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
1032CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
1033CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
b3184779 1034
d8085ea7 1035#define op_avg(a, b) a = rnd_avg32(a, b)
59fe111e 1036#endif
59fe111e
MN
1037#define op_put(a, b) a = b
1038
1039PIXOP2(avg, op_avg)
1040PIXOP2(put, op_put)
1041#undef op_avg
1042#undef op_put
1043
de6d9b64
FB
1044#define avg2(a,b) ((a+b+1)>>1)
1045#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
1046
c0a0170c
MN
1047static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
1048 put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
1049}
1050
1051static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
1052 put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
1053}
073b013d 1054
0c1a9eda 1055static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
44eb4951
MN
1056{
1057 const int A=(16-x16)*(16-y16);
1058 const int B=( x16)*(16-y16);
1059 const int C=(16-x16)*( y16);
1060 const int D=( x16)*( y16);
1061 int i;
44eb4951
MN
1062
1063 for(i=0; i<h; i++)
1064 {
b3184779
MN
1065 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
1066 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
1067 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
1068 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
1069 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
1070 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
1071 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
1072 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
1073 dst+= stride;
1074 src+= stride;
44eb4951
MN
1075 }
1076}
1077
0c1a9eda 1078static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
073b013d
MN
1079 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
1080{
1081 int y, vx, vy;
1082 const int s= 1<<shift;
1083
1084 width--;
1085 height--;
1086
1087 for(y=0; y<h; y++){
1088 int x;
1089
1090 vx= ox;
1091 vy= oy;
1092 for(x=0; x<8; x++){ //XXX FIXME optimize
1093 int src_x, src_y, frac_x, frac_y, index;
1094
1095 src_x= vx>>16;
1096 src_y= vy>>16;
1097 frac_x= src_x&(s-1);
1098 frac_y= src_y&(s-1);
1099 src_x>>=shift;
1100 src_y>>=shift;
1101
1102 if((unsigned)src_x < width){
1103 if((unsigned)src_y < height){
1104 index= src_x + src_y*stride;
1105 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
1106 + src[index +1]* frac_x )*(s-frac_y)
1107 + ( src[index+stride ]*(s-frac_x)
1108 + src[index+stride+1]* frac_x )* frac_y
1109 + r)>>(shift*2);
1110 }else{
1111 index= src_x + clip(src_y, 0, height)*stride;
1112 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
1113 + src[index +1]* frac_x )*s
1114 + r)>>(shift*2);
1115 }
1116 }else{
1117 if((unsigned)src_y < height){
1118 index= clip(src_x, 0, width) + src_y*stride;
1119 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
1120 + src[index+stride ]* frac_y )*s
1121 + r)>>(shift*2);
1122 }else{
1123 index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride;
1124 dst[y*stride + x]= src[index ];
1125 }
1126 }
1127
1128 vx+= dxx;
1129 vy+= dyx;
1130 }
1131 ox += dxy;
1132 oy += dyy;
1133 }
1134}
669ac79c
MN
1135
1136static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1137 switch(width){
1138 case 2: put_pixels2_c (dst, src, stride, height); break;
1139 case 4: put_pixels4_c (dst, src, stride, height); break;
1140 case 8: put_pixels8_c (dst, src, stride, height); break;
1141 case 16:put_pixels16_c(dst, src, stride, height); break;
1142 }
1143}
1144
1145static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1146 int i,j;
1147 for (i=0; i < height; i++) {
1148 for (j=0; j < width; j++) {
1149 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
1150 }
1151 src += stride;
1152 dst += stride;
1153 }
1154}
1155
1156static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1157 int i,j;
1158 for (i=0; i < height; i++) {
1159 for (j=0; j < width; j++) {
1160 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
1161 }
1162 src += stride;
1163 dst += stride;
1164 }
1165}
1166
1167static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1168 int i,j;
1169 for (i=0; i < height; i++) {
1170 for (j=0; j < width; j++) {
1171 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
1172 }
1173 src += stride;
1174 dst += stride;
1175 }
1176}
1177
1178static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1179 int i,j;
1180 for (i=0; i < height; i++) {
1181 for (j=0; j < width; j++) {
1182 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
1183 }
1184 src += stride;
1185 dst += stride;
1186 }
1187}
1188
1189static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1190 int i,j;
1191 for (i=0; i < height; i++) {
1192 for (j=0; j < width; j++) {
89ebf4e8 1193 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
1194 }
1195 src += stride;
1196 dst += stride;
1197 }
1198}
1199
1200static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1201 int i,j;
1202 for (i=0; i < height; i++) {
1203 for (j=0; j < width; j++) {
1204 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
1205 }
1206 src += stride;
1207 dst += stride;
1208 }
1209}
1210
1211static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1212 int i,j;
1213 for (i=0; i < height; i++) {
1214 for (j=0; j < width; j++) {
89ebf4e8 1215 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
1216 }
1217 src += stride;
1218 dst += stride;
1219 }
1220}
1221
1222static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1223 int i,j;
1224 for (i=0; i < height; i++) {
1225 for (j=0; j < width; j++) {
1226 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
1227 }
1228 src += stride;
1229 dst += stride;
1230 }
1231}
da3b9756
MM
1232
1233static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1234 switch(width){
1235 case 2: avg_pixels2_c (dst, src, stride, height); break;
1236 case 4: avg_pixels4_c (dst, src, stride, height); break;
1237 case 8: avg_pixels8_c (dst, src, stride, height); break;
1238 case 16:avg_pixels16_c(dst, src, stride, height); break;
1239 }
1240}
1241
1242static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1243 int i,j;
1244 for (i=0; i < height; i++) {
1245 for (j=0; j < width; j++) {
1246 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
1247 }
1248 src += stride;
1249 dst += stride;
1250 }
1251}
1252
1253static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1254 int i,j;
1255 for (i=0; i < height; i++) {
1256 for (j=0; j < width; j++) {
1257 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
1258 }
1259 src += stride;
1260 dst += stride;
1261 }
1262}
1263
1264static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1265 int i,j;
1266 for (i=0; i < height; i++) {
1267 for (j=0; j < width; j++) {
1268 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
1269 }
1270 src += stride;
1271 dst += stride;
1272 }
1273}
1274
1275static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1276 int i,j;
1277 for (i=0; i < height; i++) {
1278 for (j=0; j < width; j++) {
1279 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1280 }
1281 src += stride;
1282 dst += stride;
1283 }
1284}
1285
1286static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1287 int i,j;
1288 for (i=0; i < height; i++) {
1289 for (j=0; j < width; j++) {
89ebf4e8 1290 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
1291 }
1292 src += stride;
1293 dst += stride;
1294 }
1295}
1296
1297static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1298 int i,j;
1299 for (i=0; i < height; i++) {
1300 for (j=0; j < width; j++) {
1301 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
1302 }
1303 src += stride;
1304 dst += stride;
1305 }
1306}
1307
1308static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1309 int i,j;
1310 for (i=0; i < height; i++) {
1311 for (j=0; j < width; j++) {
89ebf4e8 1312 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
1313 }
1314 src += stride;
1315 dst += stride;
1316 }
1317}
1318
1319static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1320 int i,j;
1321 for (i=0; i < height; i++) {
1322 for (j=0; j < width; j++) {
1323 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1324 }
1325 src += stride;
1326 dst += stride;
1327 }
1328}
669ac79c
MN
1329#if 0
1330#define TPEL_WIDTH(width)\
1331static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1332 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
1333static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1334 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
1335static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1336 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
1337static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1338 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
1339static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1340 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
1341static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1342 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
1343static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1344 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
1345static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1346 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
1347static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1348 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
1349#endif
1350
0da71265
MN
1351#define H264_CHROMA_MC(OPNAME, OP)\
1352static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1353 const int A=(8-x)*(8-y);\
1354 const int B=( x)*(8-y);\
1355 const int C=(8-x)*( y);\
1356 const int D=( x)*( y);\
1357 int i;\
1358 \
1359 assert(x<8 && y<8 && x>=0 && y>=0);\
1360\
1361 for(i=0; i<h; i++)\
1362 {\
1363 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1364 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1365 dst+= stride;\
1366 src+= stride;\
1367 }\
1368}\
1369\
1370static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1371 const int A=(8-x)*(8-y);\
1372 const int B=( x)*(8-y);\
1373 const int C=(8-x)*( y);\
1374 const int D=( x)*( y);\
1375 int i;\
1376 \
1377 assert(x<8 && y<8 && x>=0 && y>=0);\
1378\
1379 for(i=0; i<h; i++)\
1380 {\
1381 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1382 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1383 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1384 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1385 dst+= stride;\
1386 src+= stride;\
1387 }\
1388}\
1389\
1390static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1391 const int A=(8-x)*(8-y);\
1392 const int B=( x)*(8-y);\
1393 const int C=(8-x)*( y);\
1394 const int D=( x)*( y);\
1395 int i;\
1396 \
1397 assert(x<8 && y<8 && x>=0 && y>=0);\
1398\
1399 for(i=0; i<h; i++)\
1400 {\
1401 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1402 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1403 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1404 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1405 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
1406 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
1407 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
1408 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
1409 dst+= stride;\
1410 src+= stride;\
1411 }\
1412}
1413
1414#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
1415#define op_put(a, b) a = (((b) + 32)>>6)
1416
1417H264_CHROMA_MC(put_ , op_put)
1418H264_CHROMA_MC(avg_ , op_avg)
1419#undef op_avg
1420#undef op_put
1421
1422static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
1423{
1424 int i;
1425 for(i=0; i<h; i++)
1426 {
1427 ST32(dst , LD32(src ));
1428 dst+=dstStride;
1429 src+=srcStride;
1430 }
1431}
1432
1433static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
1434{
1435 int i;
1436 for(i=0; i<h; i++)
1437 {
1438 ST32(dst , LD32(src ));
1439 ST32(dst+4 , LD32(src+4 ));
1440 dst+=dstStride;
1441 src+=srcStride;
1442 }
1443}
1444
1445static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
1446{
1447 int i;
1448 for(i=0; i<h; i++)
1449 {
1450 ST32(dst , LD32(src ));
1451 ST32(dst+4 , LD32(src+4 ));
1452 ST32(dst+8 , LD32(src+8 ));
1453 ST32(dst+12, LD32(src+12));
1454 dst+=dstStride;
1455 src+=srcStride;
1456 }
1457}
073b013d 1458
0c1a9eda 1459static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
44eb4951 1460{
44eb4951
MN
1461 int i;
1462 for(i=0; i<h; i++)
1463 {
b3184779
MN
1464 ST32(dst , LD32(src ));
1465 ST32(dst+4 , LD32(src+4 ));
1466 ST32(dst+8 , LD32(src+8 ));
1467 ST32(dst+12, LD32(src+12));
1468 dst[16]= src[16];
44eb4951
MN
1469 dst+=dstStride;
1470 src+=srcStride;
1471 }
1472}
1473
0c1a9eda 1474static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
44eb4951
MN
1475{
1476 int i;
b3184779 1477 for(i=0; i<h; i++)
44eb4951 1478 {
b3184779
MN
1479 ST32(dst , LD32(src ));
1480 ST32(dst+4 , LD32(src+4 ));
1481 dst[8]= src[8];
44eb4951
MN
1482 dst+=dstStride;
1483 src+=srcStride;
1484 }
1485}
1486
826f429a 1487
b3184779 1488#define QPEL_MC(r, OPNAME, RND, OP) \
0c1a9eda
ZK
1489static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1490 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
b3184779
MN
1491 int i;\
1492 for(i=0; i<h; i++)\
1493 {\
1494 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
1495 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
1496 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
1497 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
1498 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
1499 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
1500 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
1501 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
1502 dst+=dstStride;\
1503 src+=srcStride;\
1504 }\
44eb4951
MN
1505}\
1506\
0c1a9eda 1507static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
db794953 1508 const int w=8;\
0c1a9eda 1509 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
b3184779
MN
1510 int i;\
1511 for(i=0; i<w; i++)\
1512 {\
1513 const int src0= src[0*srcStride];\
1514 const int src1= src[1*srcStride];\
1515 const int src2= src[2*srcStride];\
1516 const int src3= src[3*srcStride];\
1517 const int src4= src[4*srcStride];\
1518 const int src5= src[5*srcStride];\
1519 const int src6= src[6*srcStride];\
1520 const int src7= src[7*srcStride];\
1521 const int src8= src[8*srcStride];\
1522 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
1523 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
1524 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
1525 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
1526 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
1527 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
1528 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
1529 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
1530 dst++;\
1531 src++;\
1532 }\
1533}\
1534\
0c1a9eda
ZK
1535static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1536 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
b3184779 1537 int i;\
826f429a 1538 \
b3184779
MN
1539 for(i=0; i<h; i++)\
1540 {\
1541 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
1542 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
1543 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
1544 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
1545 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
1546 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
1547 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
1548 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
1549 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
1550 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
1551 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
1552 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
1553 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
1554 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
1555 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
1556 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
1557 dst+=dstStride;\
1558 src+=srcStride;\
1559 }\
1560}\
1561\
0c1a9eda
ZK
1562static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1563 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
b3184779 1564 int i;\
826f429a 1565 const int w=16;\
b3184779
MN
1566 for(i=0; i<w; i++)\
1567 {\
1568 const int src0= src[0*srcStride];\
1569 const int src1= src[1*srcStride];\
1570 const int src2= src[2*srcStride];\
1571 const int src3= src[3*srcStride];\
1572 const int src4= src[4*srcStride];\
1573 const int src5= src[5*srcStride];\
1574 const int src6= src[6*srcStride];\
1575 const int src7= src[7*srcStride];\
1576 const int src8= src[8*srcStride];\
1577 const int src9= src[9*srcStride];\
1578 const int src10= src[10*srcStride];\
1579 const int src11= src[11*srcStride];\
1580 const int src12= src[12*srcStride];\
1581 const int src13= src[13*srcStride];\
1582 const int src14= src[14*srcStride];\
1583 const int src15= src[15*srcStride];\
1584 const int src16= src[16*srcStride];\
1585 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
1586 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
1587 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
1588 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
1589 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
1590 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
1591 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
1592 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
1593 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
1594 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
1595 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
1596 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
1597 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
1598 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
1599 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
1600 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
1601 dst++;\
1602 src++;\
1603 }\
1604}\
1605\
0c1a9eda 1606static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
45553457 1607 OPNAME ## pixels8_c(dst, src, stride, 8);\
b3184779
MN
1608}\
1609\
0c1a9eda
ZK
1610static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1611 uint8_t half[64];\
b3184779
MN
1612 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1613 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
44eb4951
MN
1614}\
1615\
0c1a9eda 1616static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
b3184779 1617 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
44eb4951
MN
1618}\
1619\
0c1a9eda
ZK
1620static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1621 uint8_t half[64];\
b3184779
MN
1622 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1623 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
44eb4951
MN
1624}\
1625\
0c1a9eda
ZK
1626static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1627 uint8_t full[16*9];\
1628 uint8_t half[64];\
b3184779 1629 copy_block9(full, src, 16, stride, 9);\
db794953 1630 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
b3184779 1631 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
44eb4951
MN
1632}\
1633\
0c1a9eda
ZK
1634static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1635 uint8_t full[16*9];\
b3184779 1636 copy_block9(full, src, 16, stride, 9);\
db794953 1637 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
44eb4951
MN
1638}\
1639\
0c1a9eda
ZK
1640static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1641 uint8_t full[16*9];\
1642 uint8_t half[64];\
b3184779 1643 copy_block9(full, src, 16, stride, 9);\
db794953 1644 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
b3184779 1645 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
44eb4951 1646}\
0c1a9eda
ZK
1647void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1648 uint8_t full[16*9];\
1649 uint8_t halfH[72];\
1650 uint8_t halfV[64];\
1651 uint8_t halfHV[64];\
b3184779
MN
1652 copy_block9(full, src, 16, stride, 9);\
1653 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1654 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1655 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1656 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1657}\
0c1a9eda
ZK
1658static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1659 uint8_t full[16*9];\
1660 uint8_t halfH[72];\
1661 uint8_t halfHV[64];\
db794953
MN
1662 copy_block9(full, src, 16, stride, 9);\
1663 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1664 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1665 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1666 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1667}\
0c1a9eda
ZK
1668void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1669 uint8_t full[16*9];\
1670 uint8_t halfH[72];\
1671 uint8_t halfV[64];\
1672 uint8_t halfHV[64];\
b3184779
MN
1673 copy_block9(full, src, 16, stride, 9);\
1674 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1675 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1676 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1677 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1678}\
0c1a9eda
ZK
1679static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1680 uint8_t full[16*9];\
1681 uint8_t halfH[72];\
1682 uint8_t halfHV[64];\
db794953
MN
1683 copy_block9(full, src, 16, stride, 9);\
1684 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1685 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1686 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1687 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1688}\
0c1a9eda
ZK
1689void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1690 uint8_t full[16*9];\
1691 uint8_t halfH[72];\
1692 uint8_t halfV[64];\
1693 uint8_t halfHV[64];\
b3184779
MN
1694 copy_block9(full, src, 16, stride, 9);\
1695 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1696 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1697 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1698 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1699}\
0c1a9eda
ZK
1700static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1701 uint8_t full[16*9];\
1702 uint8_t halfH[72];\
1703 uint8_t halfHV[64];\
db794953
MN
1704 copy_block9(full, src, 16, stride, 9);\
1705 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1706 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1707 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1708 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1709}\
0c1a9eda
ZK
1710void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1711 uint8_t full[16*9];\
1712 uint8_t halfH[72];\
1713 uint8_t halfV[64];\
1714 uint8_t halfHV[64];\
b3184779
MN
1715 copy_block9(full, src, 16, stride, 9);\
1716 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
db794953
MN
1717 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1718 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1719 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1720}\
0c1a9eda
ZK
1721static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1722 uint8_t full[16*9];\
1723 uint8_t halfH[72];\
1724 uint8_t halfHV[64];\
db794953
MN
1725 copy_block9(full, src, 16, stride, 9);\
1726 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1727 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1728 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1729 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1730}\
0c1a9eda
ZK
1731static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1732 uint8_t halfH[72];\
1733 uint8_t halfHV[64];\
b3184779 1734 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1735 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1736 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
44eb4951 1737}\
0c1a9eda
ZK
1738static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1739 uint8_t halfH[72];\
1740 uint8_t halfHV[64];\
b3184779 1741 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1742 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1743 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
44eb4951 1744}\
0c1a9eda
ZK
1745void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1746 uint8_t full[16*9];\
1747 uint8_t halfH[72];\
1748 uint8_t halfV[64];\
1749 uint8_t halfHV[64];\
b3184779
MN
1750 copy_block9(full, src, 16, stride, 9);\
1751 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1752 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1753 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1754 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 1755}\
0c1a9eda
ZK
1756static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1757 uint8_t full[16*9];\
1758 uint8_t halfH[72];\
db794953
MN
1759 copy_block9(full, src, 16, stride, 9);\
1760 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1761 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1762 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1763}\
0c1a9eda
ZK
1764void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1765 uint8_t full[16*9];\
1766 uint8_t halfH[72];\
1767 uint8_t halfV[64];\
1768 uint8_t halfHV[64];\
b3184779
MN
1769 copy_block9(full, src, 16, stride, 9);\
1770 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1771 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1772 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1773 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 1774}\
0c1a9eda
ZK
1775static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1776 uint8_t full[16*9];\
1777 uint8_t halfH[72];\
db794953
MN
1778 copy_block9(full, src, 16, stride, 9);\
1779 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1780 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1781 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1782}\
0c1a9eda
ZK
1783static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1784 uint8_t halfH[72];\
b3184779 1785 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1786 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
b3184779 1787}\
0c1a9eda 1788static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
45553457 1789 OPNAME ## pixels16_c(dst, src, stride, 16);\
b3184779
MN
1790}\
1791\
0c1a9eda
ZK
1792static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1793 uint8_t half[256];\
b3184779
MN
1794 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1795 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
1796}\
1797\
0c1a9eda 1798static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
b3184779 1799 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
44eb4951 1800}\
b3184779 1801\
0c1a9eda
ZK
1802static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1803 uint8_t half[256];\
b3184779
MN
1804 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1805 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
1806}\
1807\
0c1a9eda
ZK
1808static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1809 uint8_t full[24*17];\
1810 uint8_t half[256];\
b3184779 1811 copy_block17(full, src, 24, stride, 17);\
826f429a 1812 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
b3184779
MN
1813 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
1814}\
1815\
0c1a9eda
ZK
1816static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1817 uint8_t full[24*17];\
b3184779 1818 copy_block17(full, src, 24, stride, 17);\
826f429a 1819 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
b3184779
MN
1820}\
1821\
0c1a9eda
ZK
1822static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1823 uint8_t full[24*17];\
1824 uint8_t half[256];\
b3184779 1825 copy_block17(full, src, 24, stride, 17);\
826f429a 1826 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
b3184779
MN
1827 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
1828}\
0c1a9eda
ZK
1829void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1830 uint8_t full[24*17];\
1831 uint8_t halfH[272];\
1832 uint8_t halfV[256];\
1833 uint8_t halfHV[256];\
b3184779
MN
1834 copy_block17(full, src, 24, stride, 17);\
1835 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1836 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1837 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1838 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1839}\
0c1a9eda
ZK
1840static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1841 uint8_t full[24*17];\
1842 uint8_t halfH[272];\
1843 uint8_t halfHV[256];\
db794953
MN
1844 copy_block17(full, src, 24, stride, 17);\
1845 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1846 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1847 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1848 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1849}\
0c1a9eda
ZK
1850void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1851 uint8_t full[24*17];\
1852 uint8_t halfH[272];\
1853 uint8_t halfV[256];\
1854 uint8_t halfHV[256];\
b3184779
MN
1855 copy_block17(full, src, 24, stride, 17);\
1856 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1857 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1858 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1859 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1860}\
0c1a9eda
ZK
1861static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1862 uint8_t full[24*17];\
1863 uint8_t halfH[272];\
1864 uint8_t halfHV[256];\
db794953
MN
1865 copy_block17(full, src, 24, stride, 17);\
1866 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1867 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1868 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1869 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1870}\
0c1a9eda
ZK
1871void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1872 uint8_t full[24*17];\
1873 uint8_t halfH[272];\
1874 uint8_t halfV[256];\
1875 uint8_t halfHV[256];\
b3184779
MN
1876 copy_block17(full, src, 24, stride, 17);\
1877 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1878 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1879 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1880 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1881}\
0c1a9eda
ZK
1882static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1883 uint8_t full[24*17];\
1884 uint8_t halfH[272];\
1885 uint8_t halfHV[256];\
db794953
MN
1886 copy_block17(full, src, 24, stride, 17);\
1887 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1888 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1889 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1890 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1891}\
0c1a9eda
ZK
1892void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1893 uint8_t full[24*17];\
1894 uint8_t halfH[272];\
1895 uint8_t halfV[256];\
1896 uint8_t halfHV[256];\
b3184779
MN
1897 copy_block17(full, src, 24, stride, 17);\
1898 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
826f429a
MN
1899 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1900 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1901 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1902}\
0c1a9eda
ZK
1903static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1904 uint8_t full[24*17];\
1905 uint8_t halfH[272];\
1906 uint8_t halfHV[256];\
db794953
MN
1907 copy_block17(full, src, 24, stride, 17);\
1908 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1909 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1910 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1911 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1912}\
0c1a9eda
ZK
1913static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1914 uint8_t halfH[272];\
1915 uint8_t halfHV[256];\
b3184779 1916 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1917 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1918 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1919}\
0c1a9eda
ZK
1920static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1921 uint8_t halfH[272];\
1922 uint8_t halfHV[256];\
b3184779 1923 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1924 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1925 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1926}\
0c1a9eda
ZK
1927void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1928 uint8_t full[24*17];\
1929 uint8_t halfH[272];\
1930 uint8_t halfV[256];\
1931 uint8_t halfHV[256];\
b3184779
MN
1932 copy_block17(full, src, 24, stride, 17);\
1933 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1934 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1935 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1936 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
1937}\
0c1a9eda
ZK
1938static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1939 uint8_t full[24*17];\
1940 uint8_t halfH[272];\
db794953
MN
1941 copy_block17(full, src, 24, stride, 17);\
1942 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1943 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1944 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1945}\
0c1a9eda
ZK
1946void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1947 uint8_t full[24*17];\
1948 uint8_t halfH[272];\
1949 uint8_t halfV[256];\
1950 uint8_t halfHV[256];\
b3184779
MN
1951 copy_block17(full, src, 24, stride, 17);\
1952 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1953 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1954 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1955 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
1956}\
0c1a9eda
ZK
1957static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1958 uint8_t full[24*17];\
1959 uint8_t halfH[272];\
db794953
MN
1960 copy_block17(full, src, 24, stride, 17);\
1961 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1962 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1963 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1964}\
0c1a9eda
ZK
1965static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1966 uint8_t halfH[272];\
b3184779 1967 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1968 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
45553457 1969}
44eb4951 1970
b3184779
MN
1971#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1972#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1973#define op_put(a, b) a = cm[((b) + 16)>>5]
1974#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1975
1976QPEL_MC(0, put_ , _ , op_put)
1977QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1978QPEL_MC(0, avg_ , _ , op_avg)
1979//QPEL_MC(1, avg_no_rnd , _ , op_avg)
1980#undef op_avg
1981#undef op_avg_no_rnd
1982#undef op_put
1983#undef op_put_no_rnd
44eb4951 1984
0da71265
MN
1985#if 1
1986#define H264_LOWPASS(OPNAME, OP, OP2) \
1987static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1988 const int h=4;\
1989 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1990 int i;\
1991 for(i=0; i<h; i++)\
1992 {\
1993 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
1994 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
1995 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
1996 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
1997 dst+=dstStride;\
1998 src+=srcStride;\
1999 }\
2000}\
2001\
2002static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2003 const int w=4;\
2004 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
2005 int i;\
2006 for(i=0; i<w; i++)\
2007 {\
2008 const int srcB= src[-2*srcStride];\
2009 const int srcA= src[-1*srcStride];\
2010 const int src0= src[0 *srcStride];\
2011 const int src1= src[1 *srcStride];\
2012 const int src2= src[2 *srcStride];\
2013 const int src3= src[3 *srcStride];\
2014 const int src4= src[4 *srcStride];\
2015 const int src5= src[5 *srcStride];\
2016 const int src6= src[6 *srcStride];\
2017 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2018 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2019 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2020 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2021 dst++;\
2022 src++;\
2023 }\
2024}\
2025\
2026static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2027 const int h=4;\
2028 const int w=4;\
2029 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
2030 int i;\
2031 src -= 2*srcStride;\
2032 for(i=0; i<h+5; i++)\
2033 {\
2034 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2035 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2036 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
2037 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
2038 tmp+=tmpStride;\
2039 src+=srcStride;\
2040 }\
2041 tmp -= tmpStride*(h+5-2);\
2042 for(i=0; i<w; i++)\
2043 {\
2044 const int tmpB= tmp[-2*tmpStride];\
2045 const int tmpA= tmp[-1*tmpStride];\
2046 const int tmp0= tmp[0 *tmpStride];\
2047 const int tmp1= tmp[1 *tmpStride];\
2048 const int tmp2= tmp[2 *tmpStride];\
2049 const int tmp3= tmp[3 *tmpStride];\
2050 const int tmp4= tmp[4 *tmpStride];\
2051 const int tmp5= tmp[5 *tmpStride];\
2052 const int tmp6= tmp[6 *tmpStride];\
2053 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2054 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2055 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2056 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2057 dst++;\
2058 tmp++;\
2059 }\
2060}\
2061\
2062static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2063 const int h=8;\
2064 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
2065 int i;\
2066 for(i=0; i<h; i++)\
2067 {\
2068 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
2069 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
2070 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
2071 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
2072 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
2073 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
2074 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
2075 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
2076 dst+=dstStride;\
2077 src+=srcStride;\
2078 }\
2079}\
2080\
2081static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2082 const int w=8;\
2083 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
2084 int i;\
2085 for(i=0; i<w; i++)\
2086 {\
2087 const int srcB= src[-2*srcStride];\
2088 const int srcA= src[-1*srcStride];\
2089 const int src0= src[0 *srcStride];\
2090 const int src1= src[1 *srcStride];\
2091 const int src2= src[2 *srcStride];\
2092 const int src3= src[3 *srcStride];\
2093 const int src4= src[4 *srcStride];\
2094 const int src5= src[5 *srcStride];\
2095 const int src6= src[6 *srcStride];\
2096 const int src7= src[7 *srcStride];\
2097 const int src8= src[8 *srcStride];\
2098 const int src9= src[9 *srcStride];\
2099 const int src10=src[10*srcStride];\
2100 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2101 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2102 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2103 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2104 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
2105 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
2106 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
2107 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
2108 dst++;\
2109 src++;\
2110 }\
2111}\
2112\
2113static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2114 const int h=8;\
2115 const int w=8;\
2116 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
2117 int i;\
2118 src -= 2*srcStride;\
2119 for(i=0; i<h+5; i++)\
2120 {\
2121 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
2122 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
2123 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
2124 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
2125 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
2126 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
2127 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
2128 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
2129 tmp+=tmpStride;\
2130 src+=srcStride;\
2131 }\
2132 tmp -= tmpStride*(h+5-2);\
2133 for(i=0; i<w; i++)\
2134 {\
2135 const int tmpB= tmp[-2*tmpStride];\
2136 const int tmpA= tmp[-1*tmpStride];\
2137 const int tmp0= tmp[0 *tmpStride];\
2138 const int tmp1= tmp[1 *tmpStride];\
2139 const int tmp2= tmp[2 *tmpStride];\
2140 const int tmp3= tmp[3 *tmpStride];\
2141 const int tmp4= tmp[4 *tmpStride];\
2142 const int tmp5= tmp[5 *tmpStride];\
2143 const int tmp6= tmp[6 *tmpStride];\
2144 const int tmp7= tmp[7 *tmpStride];\
2145 const int tmp8= tmp[8 *tmpStride];\
2146 const int tmp9= tmp[9 *tmpStride];\
2147 const int tmp10=tmp[10*tmpStride];\
2148 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2149 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2150 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2151 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2152 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
2153 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
2154 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
2155 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
2156 dst++;\
2157 tmp++;\
2158 }\
2159}\
2160\
2161static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2162 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
2163 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2164 src += 8*srcStride;\
2165 dst += 8*dstStride;\
2166 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
2167 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2168}\
2169\
2170static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2171 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
2172 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2173 src += 8*srcStride;\
2174 dst += 8*dstStride;\
2175 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
2176 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2177}\
2178\
2179static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2180 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2181 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2182 src += 8*srcStride;\
0da71265
MN
2183 dst += 8*dstStride;\
2184 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2185 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2186}\
2187
2188#define H264_MC(OPNAME, SIZE) \
2189static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
2190 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
2191}\
2192\
2193static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2194 uint8_t half[SIZE*SIZE];\
2195 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2196 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
2197}\
2198\
2199static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
2200 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
2201}\
2202\
2203static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2204 uint8_t half[SIZE*SIZE];\
2205 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2206 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
2207}\
2208\
2209static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2210 uint8_t full[SIZE*(SIZE+5)];\
2211 uint8_t * const full_mid= full + SIZE*2;\
2212 uint8_t half[SIZE*SIZE];\
2213 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2214 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2215 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
2216}\
2217\
2218static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2219 uint8_t full[SIZE*(SIZE+5)];\
2220 uint8_t * const full_mid= full + SIZE*2;\
2221 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2222 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
2223}\
2224\
2225static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2226 uint8_t full[SIZE*(SIZE+5)];\
2227 uint8_t * const full_mid= full + SIZE*2;\
2228 uint8_t half[SIZE*SIZE];\
2229 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2230 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2231 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
2232}\
2233\
2234static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2235 uint8_t full[SIZE*(SIZE+5)];\
2236 uint8_t * const full_mid= full + SIZE*2;\
2237 uint8_t halfH[SIZE*SIZE];\
2238 uint8_t halfV[SIZE*SIZE];\
2239 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2240 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2241 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2242 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2243}\
2244\
2245static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2246 uint8_t full[SIZE*(SIZE+5)];\
2247 uint8_t * const full_mid= full + SIZE*2;\
2248 uint8_t halfH[SIZE*SIZE];\
2249 uint8_t halfV[SIZE*SIZE];\
2250 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2251 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2252 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2253 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2254}\
2255\
2256static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2257 uint8_t full[SIZE*(SIZE+5)];\
2258 uint8_t * const full_mid= full + SIZE*2;\
2259 uint8_t halfH[SIZE*SIZE];\
2260 uint8_t halfV[SIZE*SIZE];\
2261 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2262 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2263 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2264 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2265}\
2266\
2267static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2268 uint8_t full[SIZE*(SIZE+5)];\
2269 uint8_t * const full_mid= full + SIZE*2;\
2270 uint8_t halfH[SIZE*SIZE];\
2271 uint8_t halfV[SIZE*SIZE];\
2272 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2273 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2274 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2275 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2276}\
2277\
2278static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2279 int16_t tmp[SIZE*(SIZE+5)];\
2280 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
2281}\
2282\
2283static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2284 int16_t tmp[SIZE*(SIZE+5)];\
2285 uint8_t halfH[SIZE*SIZE];\
2286 uint8_t halfHV[SIZE*SIZE];\
2287 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2288 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2289 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2290}\
2291\
2292static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2293 int16_t tmp[SIZE*(SIZE+5)];\
2294 uint8_t halfH[SIZE*SIZE];\
2295 uint8_t halfHV[SIZE*SIZE];\
2296 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2297 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2298 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2299}\
2300\
2301static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2302 uint8_t full[SIZE*(SIZE+5)];\
2303 uint8_t * const full_mid= full + SIZE*2;\
2304 int16_t tmp[SIZE*(SIZE+5)];\
2305 uint8_t halfV[SIZE*SIZE];\
2306 uint8_t halfHV[SIZE*SIZE];\
2307 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2308 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2309 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2310 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2311}\
2312\
2313static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2314 uint8_t full[SIZE*(SIZE+5)];\
2315 uint8_t * const full_mid= full + SIZE*2;\
2316 int16_t tmp[SIZE*(SIZE+5)];\
2317 uint8_t halfV[SIZE*SIZE];\
2318 uint8_t halfHV[SIZE*SIZE];\
2319 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2320 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2321 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2322 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2323}\
2324
2325#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2326//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
2327#define op_put(a, b) a = cm[((b) + 16)>>5]
2328#define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
2329#define op2_put(a, b) a = cm[((b) + 512)>>10]
2330
2331H264_LOWPASS(put_ , op_put, op2_put)
2332H264_LOWPASS(avg_ , op_avg, op2_avg)
2333H264_MC(put_, 4)
2334H264_MC(put_, 8)
2335H264_MC(put_, 16)
2336H264_MC(avg_, 4)
2337H264_MC(avg_, 8)
2338H264_MC(avg_, 16)
2339
2340#undef op_avg
2341#undef op_put
2342#undef op2_avg
2343#undef op2_put
2344#endif
2345
1457ab52
MN
2346static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
2347 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2348 int i;
2349
2350 for(i=0; i<h; i++){
2351 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
2352 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
2353 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
2354 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
2355 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
2356 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
2357 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
2358 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
2359 dst+=dstStride;
2360 src+=srcStride;
2361 }
2362}
2363
2364static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
2365 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2366 int i;
2367
2368 for(i=0; i<w; i++){
2369 const int src_1= src[ -srcStride];
2370 const int src0 = src[0 ];
2371 const int src1 = src[ srcStride];
2372 const int src2 = src[2*srcStride];
2373 const int src3 = src[3*srcStride];
2374 const int src4 = src[4*srcStride];
2375 const int src5 = src[5*srcStride];
2376 const int src6 = src[6*srcStride];
2377 const int src7 = src[7*srcStride];
2378 const int src8 = src[8*srcStride];
2379 const int src9 = src[9*srcStride];
2380 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
2381 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
2382 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
2383 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
2384 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
2385 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
2386 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
2387 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
2388 src++;
2389 dst++;
2390 }
2391}
2392
2393static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
2394 put_pixels8_c(dst, src, stride, 8);
2395}
2396
2397static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
2398 uint8_t half[64];
2399 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
2400 put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
2401}
2402
2403static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
2404 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
2405}
2406
2407static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
2408 uint8_t half[64];
2409 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
2410 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
2411}
2412
2413static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
2414 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
2415}
2416
2417static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
2418 uint8_t halfH[88];
2419 uint8_t halfV[64];
2420 uint8_t halfHV[64];
2421 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2422 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
2423 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
2424 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
2425}
2426static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
2427 uint8_t halfH[88];
2428 uint8_t halfV[64];
2429 uint8_t halfHV[64];
2430 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2431 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
2432 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
2433 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
2434}
2435static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
2436 uint8_t halfH[88];
2437 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2438 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
2439}
2440
332f9ac4
MN
2441static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
2442 int x;
2443 const int strength= ff_h263_loop_filter_strength[qscale];
2444
2445 for(x=0; x<8; x++){
2446 int d1, d2, ad1;
2447 int p0= src[x-2*stride];
2448 int p1= src[x-1*stride];
2449 int p2= src[x+0*stride];
2450 int p3= src[x+1*stride];
2451 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
2452
2453 if (d<-2*strength) d1= 0;
2454 else if(d<- strength) d1=-2*strength - d;
2455 else if(d< strength) d1= d;
2456 else if(d< 2*strength) d1= 2*strength - d;
2457 else d1= 0;
2458
2459 p1 += d1;
2460 p2 -= d1;
2461 if(p1&256) p1= ~(p1>>31);
2462 if(p2&256) p2= ~(p2>>31);
2463
2464 src[x-1*stride] = p1;
2465 src[x+0*stride] = p2;
2466
5b5404e3 2467 ad1= ABS(d1)>>1;
332f9ac4
MN
2468
2469 d2= clip((p0-p3)/4, -ad1, ad1);
2470
2471 src[x-2*stride] = p0 - d2;
2472 src[x+ stride] = p3 + d2;
2473 }
2474}
2475
2476static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
2477 int y;
2478 const int strength= ff_h263_loop_filter_strength[qscale];
2479
2480 for(y=0; y<8; y++){
2481 int d1, d2, ad1;
2482 int p0= src[y*stride-2];
2483 int p1= src[y*stride-1];
2484 int p2= src[y*stride+0];
2485 int p3= src[y*stride+1];
2486 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
2487
2488 if (d<-2*strength) d1= 0;
2489 else if(d<- strength) d1=-2*strength - d;
2490 else if(d< strength) d1= d;
2491 else if(d< 2*strength) d1= 2*strength - d;
2492 else d1= 0;
2493
2494 p1 += d1;
2495 p2 -= d1;
2496 if(p1&256) p1= ~(p1>>31);
2497 if(p2&256) p2= ~(p2>>31);
2498
2499 src[y*stride-1] = p1;
2500 src[y*stride+0] = p2;
2501
2502 ad1= ABS(d1)>>1;
2503
2504 d2= clip((p0-p3)/4, -ad1, ad1);
2505
2506 src[y*stride-2] = p0 - d2;
2507 src[y*stride+1] = p3 + d2;
2508 }
2509}
1457ab52 2510
fdbbf2e0
MN
2511static void h261_loop_filter_c(uint8_t *src, int stride){
2512 int x,y,xy,yz;
2513 int temp[64];
2514
2515 for(x=0; x<8; x++){
2516 temp[x ] = 4*src[x ];
2517 temp[x + 7*8] = 4*src[x + 7*stride];
2518 }
2519 for(y=1; y<7; y++){
2520 for(x=0; x<8; x++){
2521 xy = y * stride + x;
2522 yz = y * 8 + x;
2523 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
c6148de2
MN
2524 }
2525 }
fdbbf2e0
MN
2526
2527 for(y=0; y<8; y++){
2528 src[ y*stride] = (temp[ y*8] + 2)>>2;
2529 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
2530 for(x=1; x<7; x++){
2531 xy = y * stride + x;
2532 yz = y * 8 + x;
2533 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
c6148de2
MN
2534 }
2535 }
2536}
2537
bb198e19 2538static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
2539{
2540 int s, i;
2541
2542 s = 0;
bb198e19 2543 for(i=0;i<h;i++) {
de6d9b64
FB
2544 s += abs(pix1[0] - pix2[0]);
2545 s += abs(pix1[1] - pix2[1]);
2546 s += abs(pix1[2] - pix2[2]);
2547 s += abs(pix1[3] - pix2[3]);
2548 s += abs(pix1[4] - pix2[4]);
2549 s += abs(pix1[5] - pix2[5]);
2550 s += abs(pix1[6] - pix2[6]);
2551 s += abs(pix1[7] - pix2[7]);
2552 s += abs(pix1[8] - pix2[8]);
2553 s += abs(pix1[9] - pix2[9]);
2554 s += abs(pix1[10] - pix2[10]);
2555 s += abs(pix1[11] - pix2[11]);
2556 s += abs(pix1[12] - pix2[12]);
2557 s += abs(pix1[13] - pix2[13]);
2558 s += abs(pix1[14] - pix2[14]);
2559 s += abs(pix1[15] - pix2[15]);
2560 pix1 += line_size;
2561 pix2 += line_size;
2562 }
2563 return s;
2564}
2565
bb198e19 2566static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
2567{
2568 int s, i;
2569
2570 s = 0;
bb198e19 2571 for(i=0;i<h;i++) {
de6d9b64
FB
2572 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
2573 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
2574 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
2575 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
2576 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
2577 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
2578 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
2579 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
2580 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
2581 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
2582 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
2583 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
2584 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
2585 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
2586 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
2587 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
2588 pix1 += line_size;
2589 pix2 += line_size;
2590 }
2591 return s;
2592}
2593
bb198e19 2594static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
2595{
2596 int s, i;
0c1a9eda 2597 uint8_t *pix3 = pix2 + line_size;
de6d9b64
FB
2598
2599 s = 0;
bb198e19 2600 for(i=0;i<h;i++) {
de6d9b64
FB
2601 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
2602 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
2603 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
2604 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
2605 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
2606 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
2607 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
2608 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
2609 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
2610 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
2611 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
2612 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
2613 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
2614 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
2615 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
2616 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
2617 pix1 += line_size;
2618 pix2 += line_size;
2619 pix3 += line_size;
2620 }
2621 return s;
2622}
2623
bb198e19 2624static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
2625{
2626 int s, i;
0c1a9eda 2627 uint8_t *pix3 = pix2 + line_size;
de6d9b64
FB
2628
2629 s = 0;
bb198e19 2630 for(i=0;i<h;i++) {
de6d9b64
FB
2631 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
2632 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
2633 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
2634 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
2635 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
2636 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
2637 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
2638 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
2639 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
2640 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
2641 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
2642 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
2643 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
2644 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
2645 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
2646 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
2647 pix1 += line_size;
2648 pix2 += line_size;
2649 pix3 += line_size;
2650 }
2651 return s;
2652}
2653
bb198e19 2654static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
2655{
2656 int s, i;
2657
2658 s = 0;
bb198e19 2659 for(i=0;i<h;i++) {
ba6802de
MN
2660 s += abs(pix1[0] - pix2[0]);
2661 s += abs(pix1[1] - pix2[1]);
2662 s += abs(pix1[2] - pix2[2]);
2663 s += abs(pix1[3] - pix2[3]);
2664 s += abs(pix1[4] - pix2[4]);
2665 s += abs(pix1[5] - pix2[5]);
2666 s += abs(pix1[6] - pix2[6]);
2667 s += abs(pix1[7] - pix2[7]);
2668 pix1 += line_size;
2669 pix2 += line_size;
2670 }
2671 return s;
2672}
2673
bb198e19 2674static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
2675{
2676 int s, i;
2677
2678 s = 0;
bb198e19 2679 for(i=0;i<h;i++) {
ba6802de
MN
2680 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
2681 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
2682 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
2683 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
2684 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
2685 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
2686 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
2687 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
2688 pix1 += line_size;
2689 pix2 += line_size;
2690 }
2691 return s;
2692}
2693
bb198e19 2694static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
2695{
2696 int s, i;
0c1a9eda 2697 uint8_t *pix3 = pix2 + line_size;
ba6802de
MN
2698
2699 s = 0;
bb198e19 2700 for(i=0;i<h;i++) {
ba6802de
MN
2701 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
2702 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
2703 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
2704 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
2705 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
2706 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
2707 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
2708 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
2709 pix1 += line_size;
2710 pix2 += line_size;
2711 pix3 += line_size;
2712 }
2713 return s;
2714}
2715
bb198e19 2716static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
2717{
2718 int s, i;
0c1a9eda 2719 uint8_t *pix3 = pix2 + line_size;
ba6802de
MN
2720
2721 s = 0;
bb198e19 2722 for(i=0;i<h;i++) {
ba6802de
MN
2723 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
2724 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
2725 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
2726 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
2727 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
2728 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
2729 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
2730 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
2731 pix1 += line_size;
2732 pix2 += line_size;
2733 pix3 += line_size;
2734 }
2735 return s;
2736}
2737
d4c5d2ad 2738static int nsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, int stride, int h){
e6a2ac34
MN
2739 int score1=0;
2740 int score2=0;
2741 int x,y;
d4c5d2ad 2742
e6a2ac34
MN
2743 for(y=0; y<h; y++){
2744 for(x=0; x<16; x++){
2745 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
2746 }
2747 if(y+1<h){
2748 for(x=0; x<15; x++){
2749 score2+= ABS( s1[x ] - s1[x +stride]
2750 - s1[x+1] + s1[x+1+stride])
2751 -ABS( s2[x ] - s2[x +stride]
2752 - s2[x+1] + s2[x+1+stride]);
2753 }
2754 }
2755 s1+= stride;
2756 s2+= stride;
2757 }
d4c5d2ad
MN
2758
2759 if(c) return score1 + ABS(score2)*c->avctx->nsse_weight;
2760 else return score1 + ABS(score2)*8;
e6a2ac34
MN
2761}
2762
d4c5d2ad 2763static int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, int stride, int h){
e6a2ac34
MN
2764 int score1=0;
2765 int score2=0;
2766 int x,y;
2767
2768 for(y=0; y<h; y++){
2769 for(x=0; x<8; x++){
2770 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
2771 }
2772 if(y+1<h){
2773 for(x=0; x<7; x++){
2774 score2+= ABS( s1[x ] - s1[x +stride]
2775 - s1[x+1] + s1[x+1+stride])
2776 -ABS( s2[x ] - s2[x +stride]
2777 - s2[x+1] + s2[x+1+stride]);
2778 }
2779 }
2780 s1+= stride;
2781 s2+= stride;
2782 }
2783
d4c5d2ad
MN
2784 if(c) return score1 + ABS(score2)*c->avctx->nsse_weight;
2785 else return score1 + ABS(score2)*8;
e6a2ac34
MN
2786}
2787
364a1797
MN
2788static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
2789 int i;
2790 unsigned int sum=0;
2791
2792 for(i=0; i<8*8; i++){
2793 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
2794 int w= weight[i];
2795 b>>= RECON_SHIFT;
2796 assert(-512<b && b<512);
2797
2798 sum += (w*b)*(w*b)>>4;
2799 }
2800 return sum>>2;
2801}
2802
2803static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
2804 int i;
2805
2806 for(i=0; i<8*8; i++){
2807 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
2808 }
2809}
2810
a9badb51
MN
2811/**
2812 * permutes an 8x8 block.
2a5700de 2813 * @param block the block which will be permuted according to the given permutation vector
a9badb51
MN
2814 * @param permutation the permutation vector
2815 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
2a5700de
MN
2816 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
2817 * (inverse) permutated to scantable order!
a9badb51 2818 */
0c1a9eda 2819void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
d962f6fd 2820{
7801d21d 2821 int i;
477ab036 2822 DCTELEM temp[64];
7801d21d
MN
2823
2824 if(last<=0) return;
9a7b310d 2825 //if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms
d962f6fd 2826
7801d21d
MN
2827 for(i=0; i<=last; i++){
2828 const int j= scantable[i];
2829 temp[j]= block[j];
2830 block[j]=0;
2831 }
2832
2833 for(i=0; i<=last; i++){
2834 const int j= scantable[i];
2835 const int perm_j= permutation[j];
2836 block[perm_j]= temp[j];
2837 }
d962f6fd 2838}
e0eac44e 2839
622348f9
MN
2840static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
2841 return 0;
2842}
2843
2844void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
2845 int i;
2846
2847 memset(cmp, 0, sizeof(void*)*5);
2848
2849 for(i=0; i<5; i++){
2850 switch(type&0xFF){
2851 case FF_CMP_SAD:
2852 cmp[i]= c->sad[i];
2853 break;
2854 case FF_CMP_SATD:
2855 cmp[i]= c->hadamard8_diff[i];
2856 break;
2857 case FF_CMP_SSE:
2858 cmp[i]= c->sse[i];
2859 break;
2860 case FF_CMP_DCT:
2861 cmp[i]= c->dct_sad[i];
2862 break;
2863 case FF_CMP_PSNR:
2864 cmp[i]= c->quant_psnr[i];
2865 break;
2866 case FF_CMP_BIT:
2867 cmp[i]= c->bit[i];
2868 break;
2869 case FF_CMP_RD:
2870 cmp[i]= c->rd[i];
2871 break;
2872 case FF_CMP_VSAD:
2873 cmp[i]= c->vsad[i];
2874 break;
2875 case FF_CMP_VSSE:
2876 cmp[i]= c->vsse[i];
2877 break;
2878 case FF_CMP_ZERO:
2879 cmp[i]= zero_cmp;
2880 break;
e6a2ac34
MN
2881 case FF_CMP_NSSE:
2882 cmp[i]= c->nsse[i];
2883 break;
26efc54e
MN
2884 case FF_CMP_W53:
2885 cmp[i]= c->w53[i];
2886 break;
2887 case FF_CMP_W97:
2888 cmp[i]= c->w97[i];
2889 break;
622348f9
MN
2890 default:
2891 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
2892 }
2893 }
2894}
2895
2a5700de
MN
2896/**
2897 * memset(blocks, 0, sizeof(DCTELEM)*6*64)
2898 */
eb4b3dd3 2899static void clear_blocks_c(DCTELEM *blocks)
649c00c9
MN
2900{
2901 memset(blocks, 0, sizeof(DCTELEM)*6*64);
2902}
2903
11f18faf
MN
2904static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
2905 int i;
d32ac509 2906 for(i=0; i+7<w; i+=8){
11f18faf
MN
2907 dst[i+0] += src[i+0];
2908 dst[i+1] += src[i+1];
2909 dst[i+2] += src[i+2];
2910 dst[i+3] += src[i+3];
2911 dst[i+4] += src[i+4];
2912 dst[i+5] += src[i+5];
2913 dst[i+6] += src[i+6];
2914 dst[i+7] += src[i+7];
2915 }
2916 for(; i<w; i++)
2917 dst[i+0] += src[i+0];
2918}
2919
2920static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
2921 int i;
d32ac509 2922 for(i=0; i+7<w; i+=8){
11f18faf
MN
2923 dst[i+0] = src1[i+0]-src2[i+0];
2924 dst[i+1] = src1[i+1]-src2[i+1];
2925 dst[i+2] = src1[i+2]-src2[i+2];
2926 dst[i+3] = src1[i+3]-src2[i+3];
2927 dst[i+4] = src1[i+4]-src2[i+4];
2928 dst[i+5] = src1[i+5]-src2[i+5];
2929 dst[i+6] = src1[i+6]-src2[i+6];
2930 dst[i+7] = src1[i+7]-src2[i+7];
2931 }
2932 for(; i<w; i++)
2933 dst[i+0] = src1[i+0]-src2[i+0];
2934}
2935
84705403
MN
2936static void sub_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){
2937 int i;
2938 uint8_t l, lt;
2939
2940 l= *left;
2941 lt= *left_top;
2942
2943 for(i=0; i<w; i++){
2944 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
2945 lt= src1[i];
2946 l= src2[i];
2947 dst[i]= l - pred;
2948 }
2949
2950 *left= l;
2951 *left_top= lt;
2952}
2953
1457ab52
MN
2954#define BUTTERFLY2(o1,o2,i1,i2) \
2955o1= (i1)+(i2);\
2956o2= (i1)-(i2);
2957
2958#define BUTTERFLY1(x,y) \
2959{\
2960 int a,b;\
2961 a= x;\
2962 b= y;\
2963 x= a+b;\
2964 y= a-b;\
2965}
2966
2967#define BUTTERFLYA(x,y) (ABS((x)+(y)) + ABS((x)-(y)))
2968
bb198e19 2969static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
1457ab52
MN
2970 int i;
2971 int temp[64];
2972 int sum=0;
bb198e19
MN
2973
2974 assert(h==8);
1457ab52
MN
2975
2976 for(i=0; i<8; i++){
2977 //FIXME try pointer walks
2978 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
2979 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
2980 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
2981 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
2982
2983 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2984 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2985 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2986 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2987
2988 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2989 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2990 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2991 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2992 }
2993
2994 for(i=0; i<8; i++){
2995 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2996 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2997 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2998 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2999
3000 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
3001 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
3002 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
3003 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
3004
3005 sum +=
3006 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
3007 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
3008 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
3009 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
3010 }
3011#if 0
3012static int maxi=0;
3013if(sum>maxi){
3014 maxi=sum;
3015 printf("MAX:%d\n", maxi);
3016}
3017#endif
3018 return sum;
3019}
3020
622348f9 3021static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
1457ab52
MN
3022 int i;
3023 int temp[64];
3024 int sum=0;
622348f9
MN
3025
3026 assert(h==8);
3027
1457ab52
MN
3028 for(i=0; i<8; i++){
3029 //FIXME try pointer walks
622348f9
MN
3030 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
3031 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
3032 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
3033 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
1457ab52
MN
3034
3035 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
3036 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
3037 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
3038 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
3039
3040 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
3041 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
3042 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
3043 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
3044 }
3045
3046 for(i=0; i<8; i++){
3047 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
3048 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
3049 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
3050 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
3051
3052 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
3053 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
3054 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
3055 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
3056
3057 sum +=
3058 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
3059 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
3060 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
3061 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
3062 }
3063
622348f9
MN
3064 sum -= ABS(temp[8*0] + temp[8*4]); // -mean
3065
1457ab52
MN
3066 return sum;
3067}
3068
bb198e19 3069static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
1457ab52 3070 MpegEncContext * const s= (MpegEncContext *)c;
76fbb024
MN
3071 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
3072 DCTELEM * const temp= (DCTELEM*)aligned_temp;
1457ab52 3073 int sum=0, i;
bb198e19
MN
3074
3075 assert(h==8);
1457ab52
MN
3076
3077 s->dsp.diff_pixels(temp, src1, src2, stride);
b0368839 3078 s->dsp.fdct(temp);
1457ab52
MN
3079
3080 for(i=0; i<64; i++)
3081 sum+= ABS(temp[i]);
3082
3083 return sum;
3084}
3085
0e15384d 3086void simple_idct(DCTELEM *block); //FIXME
1457ab52 3087
bb198e19 3088static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
1457ab52 3089 MpegEncContext * const s= (MpegEncContext *)c;
76fbb024
MN
3090 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64*2/8];
3091 DCTELEM * const temp= (DCTELEM*)aligned_temp;
3092 DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64;
1457ab52
MN
3093 int sum=0, i;
3094
bb198e19 3095 assert(h==8);
1457ab52
MN
3096 s->mb_intra=0;
3097
3098 s->dsp.diff_pixels(temp, src1, src2, stride);
3099
3100 memcpy(bak, temp, 64*sizeof(DCTELEM));
3101
67725183 3102 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
d50635cd 3103 s->dct_unquantize_inter(s, temp, 0, s->qscale);
1457ab52
MN
3104 simple_idct(temp); //FIXME
3105
3106 for(i=0; i<64; i++)
3107 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
3108
3109 return sum;
3110}
3111
bb198e19 3112static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3a87ac94 3113 MpegEncContext * const s= (MpegEncContext *)c;
0c1a9eda 3114 const uint8_t *scantable= s->intra_scantable.permutated;
76fbb024
MN
3115 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
3116 uint64_t __align8 aligned_bak[stride];
3117 DCTELEM * const temp= (DCTELEM*)aligned_temp;
3118 uint8_t * const bak= (uint8_t*)aligned_bak;
3a87ac94
MN
3119 int i, last, run, bits, level, distoration, start_i;
3120 const int esc_length= s->ac_esc_length;
3121 uint8_t * length;
3122 uint8_t * last_length;
67725183 3123
bb198e19
MN
3124 assert(h==8);
3125
67725183
MN
3126 for(i=0; i<8; i++){
3127 ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0];
3128 ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1];
3129 }
3a87ac94 3130
67725183
MN
3131 s->dsp.diff_pixels(temp, src1, src2, stride);
3132
3133 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
3134
3135 bits=0;
3a87ac94
MN
3136
3137 if (s->mb_intra) {
67725183 3138 start_i = 1;
3a87ac94
MN
3139 length = s->intra_ac_vlc_length;
3140 last_length= s->intra_ac_vlc_last_length;
67725183 3141 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3a87ac94
MN
3142 } else {
3143 start_i = 0;
3144 length = s->inter_ac_vlc_length;
3145 last_length= s->inter_ac_vlc_last_length;
3146 }
3a87ac94 3147
67725183 3148 if(last>=start_i){
3a87ac94
MN
3149 run=0;
3150 for(i=start_i; i<last; i++){
3151 int j= scantable[i];
3152 level= temp[j];
3153
3154 if(level){
3155 level+=64;
3156 if((level&(~127)) == 0){
3157 bits+= length[UNI_AC_ENC_INDEX(run, level)];
3158 }else
3159 bits+= esc_length;
3160 run=0;
3161 }else
3162 run++;
3163 }
3164 i= scantable[last];
1d0eab1d 3165
3a87ac94 3166 level= temp[i] + 64;
1d0eab1d
MN
3167
3168 assert(level - 64);
3169
3a87ac94
MN
3170 if((level&(~127)) == 0){
3171 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
3172 }else
3173 bits+= esc_length;
3174
67725183
MN
3175 }
3176
3177 if(last>=0){
d50635cd
MN
3178 if(s->mb_intra)
3179 s->dct_unquantize_intra(s, temp, 0, s->qscale);
3180 else
3181 s->dct_unquantize_inter(s, temp, 0, s->qscale);
3a87ac94
MN
3182 }
3183
b0368839 3184 s->dsp.idct_add(bak, stride, temp);
3a87ac94 3185
bb198e19 3186 distoration= s->dsp.sse[1](NULL, bak, src1, stride, 8);
3a87ac94 3187
67725183 3188 return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7);
3a87ac94
MN
3189}
3190
bb198e19 3191static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3a87ac94 3192 MpegEncContext * const s= (MpegEncContext *)c;
0c1a9eda 3193 const uint8_t *scantable= s->intra_scantable.permutated;
76fbb024
MN
3194 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
3195 DCTELEM * const temp= (DCTELEM*)aligned_temp;
3a87ac94
MN
3196 int i, last, run, bits, level, start_i;
3197 const int esc_length= s->ac_esc_length;
3198 uint8_t * length;
3199 uint8_t * last_length;
bb198e19
MN
3200
3201 assert(h==8);
67725183
MN
3202
3203 s->dsp.diff_pixels(temp, src1, src2, stride);
3a87ac94 3204
67725183
MN
3205 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
3206
3207 bits=0;
3a87ac94
MN
3208
3209 if (s->mb_intra) {
67725183 3210 start_i = 1;
3a87ac94
MN
3211 length = s->intra_ac_vlc_length;
3212 last_length= s->intra_ac_vlc_last_length;
67725183 3213 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3a87ac94
MN
3214 } else {
3215 start_i = 0;
3216 length = s->inter_ac_vlc_length;
3217 last_length= s->inter_ac_vlc_last_length;
3218 }
3a87ac94 3219
67725183 3220 if(last>=start_i){
3a87ac94
MN
3221 run=0;
3222 for(i=start_i; i<last; i++){
3223 int j= scantable[i];
3224 level= temp[j];
3225
3226 if(level){
3227 level+=64;
3228 if((level&(~127)) == 0){
3229 bits+= length[UNI_AC_ENC_INDEX(run, level)];
3230 }else
3231 bits+= esc_length;
3232 run=0;
3233 }else
3234 run++;
3235 }
3236 i= scantable[last];
67725183
MN
3237
3238 level= temp[i] + 64;
3a87ac94 3239
67725183 3240 assert(level - 64);
3a87ac94 3241
3a87ac94
MN
3242 if((level&(~127)) == 0){
3243 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
3244 }else
3245 bits+= esc_length;
3246 }
3247
3248 return bits;
3249}
3250
622348f9
MN
3251static int vsad_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){
3252 int score=0;
3253 int x,y;
3254
3255 for(y=1; y<h; y++){
3256 for(x=0; x<16; x+=4){
3257 score+= ABS(s[x ] - s[x +stride]) + ABS(s[x+1] - s[x+1+stride])
3258 +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]);
3259 }
3260 s+= stride;
3261 }
3262
3263 return score;
3264}
3265
3266static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
3267 int score=0;
3268 int x,y;
3269
3270 for(y=1; y<h; y++){
3271 for(x=0; x<16; x++){
3272 score+= ABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
3273 }
3274 s1+= stride;
3275 s2+= stride;
3276 }
3277
3278 return score;
3279}
3280
3281#define SQ(a) ((a)*(a))
3282static int vsse_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){
3283 int score=0;
3284 int x,y;
3285
3286 for(y=1; y<h; y++){
3287 for(x=0; x<16; x+=4){
3288 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride])
3289 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
3290 }
3291 s+= stride;
3292 }
3293
3294 return score;
3295}
3296
3297static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
3298 int score=0;
3299 int x,y;
3300
3301 for(y=1; y<h; y++){
3302 for(x=0; x<16; x++){
3303 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
3304 }
3305 s1+= stride;
3306 s2+= stride;
3307 }
3308
3309 return score;
3310}
3311
bb198e19 3312WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
622348f9 3313WARPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
bb198e19
MN
3314WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
3315WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
3316WARPER8_16_SQ(rd8x8_c, rd16_c)
3317WARPER8_16_SQ(bit8x8_c, bit16_c)
1457ab52 3318
b0368839
MN
3319/* XXX: those functions should be suppressed ASAP when all IDCTs are
3320 converted */
3321static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
3322{
3323 j_rev_dct (block);
3324 put_pixels_clamped_c(block, dest, line_size);
3325}
3326static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
3327{
3328 j_rev_dct (block);
3329 add_pixels_clamped_c(block, dest, line_size);
3330}
3331
178fcca8
MN
3332static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
3333{
3334 j_rev_dct4 (block);
3335 put_pixels_clamped4_c(block, dest, line_size);
3336}
3337static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
3338{
3339 j_rev_dct4 (block);
3340 add_pixels_clamped4_c(block, dest, line_size);
3341}
3342
59cf08ce
FB
3343/* init static data */
3344void dsputil_static_init(void)
e0eac44e 3345{
d2975f8d 3346 int i;
e0eac44e 3347
59cf08ce
FB
3348 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
3349 for(i=0;i<MAX_NEG_CROP;i++) {
3350 cropTbl[i] = 0;
3351 cropTbl[i + MAX_NEG_CROP + 256] = 255;
3352 }
3353
3354 for(i=0;i<512;i++) {
3355 squareTbl[i] = (i - 256) * (i - 256);
3356 }
3357
3358 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
3359}
92ddb692 3360
92ddb692 3361
59cf08ce
FB
3362void dsputil_init(DSPContext* c, AVCodecContext *avctx)
3363{
3364 int i;
de6d9b64 3365
b0368839 3366#ifdef CONFIG_ENCODERS
10acc479 3367 if(avctx->dct_algo==FF_DCT_FASTINT) {
b0368839 3368 c->fdct = fdct_ifast;
48b1f800 3369 c->fdct248 = fdct_ifast248;
10acc479
RS
3370 }
3371 else if(avctx->dct_algo==FF_DCT_FAAN) {
65e4c8c9 3372 c->fdct = ff_faandct;
48b1f800 3373 c->fdct248 = ff_faandct248;
10acc479
RS
3374 }
3375 else {
b0368839 3376 c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
10acc479
RS
3377 c->fdct248 = ff_fdct248_islow;
3378 }
b0368839
MN
3379#endif //CONFIG_ENCODERS
3380
178fcca8
MN
3381 if(avctx->lowres==1){
3382 c->idct_put= ff_jref_idct4_put;
3383 c->idct_add= ff_jref_idct4_add;
3384 c->idct = j_rev_dct4;
b0368839 3385 c->idct_permutation_type= FF_NO_IDCT_PERM;
178fcca8
MN
3386 }else{
3387 if(avctx->idct_algo==FF_IDCT_INT){
3388 c->idct_put= ff_jref_idct_put;
3389 c->idct_add= ff_jref_idct_add;
3390 c->idct = j_rev_dct;
3391 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
3392 }else{ //accurate/default
3393 c->idct_put= simple_idct_put;
3394 c->idct_add= simple_idct_add;
3395 c->idct = simple_idct;
3396 c->idct_permutation_type= FF_NO_IDCT_PERM;
3397 }
b0368839
MN
3398 }
3399
44cb64ee
MM
3400 /* VP3 DSP support */
3401 c->vp3_dsp_init = vp3_dsp_init_c;
116824d0 3402 c->vp3_idct = vp3_idct_c;
44cb64ee 3403
eb4b3dd3
ZK
3404 c->get_pixels = get_pixels_c;
3405 c->diff_pixels = diff_pixels_c;
3406 c->put_pixels_clamped = put_pixels_clamped_c;
f9ed9d85 3407 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
eb4b3dd3
ZK
3408 c->add_pixels_clamped = add_pixels_clamped_c;
3409 c->gmc1 = gmc1_c;
3410 c->gmc = gmc_c;
3411 c->clear_blocks = clear_blocks_c;
3412 c->pix_sum = pix_sum_c;
3413 c->pix_norm1 = pix_norm1_c;
3414
45553457 3415 /* TODO [0] 16 [1] 8 */
bb198e19
MN
3416 c->pix_abs[0][0] = pix_abs16_c;
3417 c->pix_abs[0][1] = pix_abs16_x2_c;
3418 c->pix_abs[0][2] = pix_abs16_y2_c;
3419 c->pix_abs[0][3] = pix_abs16_xy2_c;
3420 c->pix_abs[1][0] = pix_abs8_c;
3421 c->pix_abs[1][1] = pix_abs8_x2_c;
3422 c->pix_abs[1][2] = pix_abs8_y2_c;
3423 c->pix_abs[1][3] = pix_abs8_xy2_c;
eb4b3dd3 3424
45553457
ZK
3425#define dspfunc(PFX, IDX, NUM) \
3426 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
3427 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \
3428 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \
3429 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
3430
3431 dspfunc(put, 0, 16);
3432 dspfunc(put_no_rnd, 0, 16);
3433 dspfunc(put, 1, 8);
3434 dspfunc(put_no_rnd, 1, 8);
669ac79c
MN
3435 dspfunc(put, 2, 4);
3436 dspfunc(put, 3, 2);
45553457
ZK
3437
3438 dspfunc(avg, 0, 16);
3439 dspfunc(avg_no_rnd, 0, 16);
3440 dspfunc(avg, 1, 8);
3441 dspfunc(avg_no_rnd, 1, 8);
da3b9756
MM
3442 dspfunc(avg, 2, 4);
3443 dspfunc(avg, 3, 2);
45553457
ZK
3444#undef dspfunc
3445
c0a0170c
MN
3446 c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c;
3447 c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c;
3448
669ac79c
MN
3449 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
3450 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
3451 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
3452 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
3453 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
3454 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
3455 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
3456 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
3457 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
3458
da3b9756
MM
3459 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
3460 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
3461 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
3462 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
3463 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
3464 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
3465 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
3466 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
3467 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
3468
45553457
ZK
3469#define dspfunc(PFX, IDX, NUM) \
3470 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
3471 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
3472 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
3473 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
3474 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
3475 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
3476 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
3477 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
3478 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
3479 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
3480 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
3481 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
3482 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
3483 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
3484 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
3485 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
3486
3487 dspfunc(put_qpel, 0, 16);
3488 dspfunc(put_no_rnd_qpel, 0, 16);
3489
3490 dspfunc(avg_qpel, 0, 16);
3491 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
3492
3493 dspfunc(put_qpel, 1, 8);
3494 dspfunc(put_no_rnd_qpel, 1, 8);
3495
3496 dspfunc(avg_qpel, 1, 8);
3497 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
0da71265
MN
3498
3499 dspfunc(put_h264_qpel, 0, 16);
3500 dspfunc(put_h264_qpel, 1, 8);
3501 dspfunc(put_h264_qpel, 2, 4);
3502 dspfunc(avg_h264_qpel, 0, 16);
3503 dspfunc(avg_h264_qpel, 1, 8);
3504 dspfunc(avg_h264_qpel, 2, 4);
3505
45553457 3506#undef dspfunc
0da71265
MN
3507 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
3508 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
3509 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
3510 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
3511 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
3512 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
c9a2ebc4 3513
1457ab52
MN
3514 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
3515 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
3516 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
3517 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
3518 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
3519 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
3520 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
3521 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
669ac79c 3522
bb198e19
MN
3523#define SET_CMP_FUNC(name) \
3524 c->name[0]= name ## 16_c;\
3525 c->name[1]= name ## 8x8_c;
3526
3527 SET_CMP_FUNC(hadamard8_diff)
622348f9 3528 c->hadamard8_diff[4]= hadamard8_intra16_c;
bb198e19
MN
3529 SET_CMP_FUNC(dct_sad)
3530 c->sad[0]= pix_abs16_c;
3531 c->sad[1]= pix_abs8_c;
3532 c->sse[0]= sse16_c;
3533 c->sse[1]= sse8_c;
26efc54e 3534 c->sse[2]= sse4_c;
bb198e19
MN
3535 SET_CMP_FUNC(quant_psnr)
3536 SET_CMP_FUNC(rd)
3537 SET_CMP_FUNC(bit)
622348f9
MN
3538 c->vsad[0]= vsad16_c;
3539 c->vsad[4]= vsad_intra16_c;
3540 c->vsse[0]= vsse16_c;
3541 c->vsse[4]= vsse_intra16_c;
e6a2ac34
MN
3542 c->nsse[0]= nsse16_c;
3543 c->nsse[1]= nsse8_c;
26efc54e
MN
3544 c->w53[0]= w53_16_c;
3545 c->w53[1]= w53_8_c;
3546 c->w97[0]= w97_16_c;
3547 c->w97[1]= w97_8_c;
3548
11f18faf
MN
3549 c->add_bytes= add_bytes_c;
3550 c->diff_bytes= diff_bytes_c;
84705403 3551 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
3d2e8cce 3552 c->bswap_buf= bswap_buf;
332f9ac4
MN
3553
3554 c->h263_h_loop_filter= h263_h_loop_filter_c;
3555 c->h263_v_loop_filter= h263_v_loop_filter_c;
364a1797 3556
fdbbf2e0 3557 c->h261_loop_filter= h261_loop_filter_c;
c6148de2 3558
364a1797
MN
3559 c->try_8x8basis= try_8x8basis_c;
3560 c->add_8x8basis= add_8x8basis_c;
11f18faf 3561
980fc7b8 3562#ifdef HAVE_MMX
b0368839 3563 dsputil_init_mmx(c, avctx);
de6d9b64 3564#endif
3d03c0a2 3565#ifdef ARCH_ARMV4L
b0368839 3566 dsputil_init_armv4l(c, avctx);
3d03c0a2 3567#endif
c34270f5 3568#ifdef HAVE_MLIB
b0368839 3569 dsputil_init_mlib(c, avctx);
c34270f5 3570#endif
44f54ceb
MN
3571#ifdef ARCH_SPARC
3572 dsputil_init_vis(c,avctx);
3573#endif
1e98dffb 3574#ifdef ARCH_ALPHA
b0368839 3575 dsputil_init_alpha(c, avctx);
1e98dffb 3576#endif
59925ef2 3577#ifdef ARCH_POWERPC
b0368839 3578 dsputil_init_ppc(c, avctx);
a43bd1d7 3579#endif
d46aba26 3580#ifdef HAVE_MMI
b0368839 3581 dsputil_init_mmi(c, avctx);
d46aba26 3582#endif
0c6bd2ea
B
3583#ifdef ARCH_SH4
3584 dsputil_init_sh4(c,avctx);
3585#endif
43f1708f 3586
b0368839
MN
3587 switch(c->idct_permutation_type){
3588 case FF_NO_IDCT_PERM:
3589 for(i=0; i<64; i++)
3590 c->idct_permutation[i]= i;
3591 break;
3592 case FF_LIBMPEG2_IDCT_PERM:
3593 for(i=0; i<64; i++)
3594 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
3595 break;
3596 case FF_SIMPLE_IDCT_PERM:
3597 for(i=0; i<64; i++)
3598 c->idct_permutation[i]= simple_mmx_permutation[i];
3599 break;
3600 case FF_TRANSPOSE_IDCT_PERM:
3601 for(i=0; i<64; i++)
3602 c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
3603 break;
3604 default:
9b879566 3605 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
b0368839 3606 }
57060b1e 3607}
b0368839 3608