dsputil: remove debug message in dsputil_init().
[libav.git] / libavcodec / dsputil.c
CommitLineData
de6d9b64
FB
1/*
2 * DSP utils
406792e7 3 * Copyright (c) 2000, 2001 Fabrice Bellard
8f2ab833 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
de6d9b64 5 *
7b94177e
DB
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
7 *
2912e87a 8 * This file is part of Libav.
b78e7197 9 *
2912e87a 10 * Libav is free software; you can redistribute it and/or
ff4ec49e
FB
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
b78e7197 13 * version 2.1 of the License, or (at your option) any later version.
de6d9b64 14 *
2912e87a 15 * Libav is distributed in the hope that it will be useful,
de6d9b64 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ff4ec49e
FB
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
de6d9b64 19 *
ff4ec49e 20 * You should have received a copy of the GNU Lesser General Public
2912e87a 21 * License along with Libav; if not, write to the Free Software
5509bffa 22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
de6d9b64 23 */
115329f1 24
983e3246 25/**
ba87f080 26 * @file
983e3246
MN
27 * DSP utils
28 */
115329f1 29
737eb597 30#include "libavutil/imgutils.h"
de6d9b64
FB
31#include "avcodec.h"
32#include "dsputil.h"
b0368839 33#include "simple_idct.h"
65e4c8c9 34#include "faandct.h"
6f08c541 35#include "faanidct.h"
199436b9 36#include "mathops.h"
af818f7a
DB
37#include "mpegvideo.h"
38#include "config.h"
3da11804
MR
39#include "ac3dec.h"
40#include "vorbis.h"
5596c60c 41
55fde95e 42uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
1d503957 43uint32_t ff_squareTbl[512] = {0, };
de6d9b64 44
19a0729b
OA
45#define BIT_DEPTH 9
46#include "dsputil_template.c"
47#undef BIT_DEPTH
48
49#define BIT_DEPTH 10
50#include "dsputil_template.c"
51#undef BIT_DEPTH
52
53#define BIT_DEPTH 8
325eefa2
OA
54#include "dsputil_template.c"
55
917f55cc
LM
56// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
57#define pb_7f (~0UL/255 * 0x7f)
58#define pb_80 (~0UL/255 * 0x80)
469bd7b1 59
0c1a9eda 60const uint8_t ff_zigzag_direct[64] = {
2ad1516a
MN
61 0, 1, 8, 16, 9, 2, 3, 10,
62 17, 24, 32, 25, 18, 11, 4, 5,
e0eac44e 63 12, 19, 26, 33, 40, 48, 41, 34,
2ad1516a 64 27, 20, 13, 6, 7, 14, 21, 28,
e0eac44e
FB
65 35, 42, 49, 56, 57, 50, 43, 36,
66 29, 22, 15, 23, 30, 37, 44, 51,
67 58, 59, 52, 45, 38, 31, 39, 46,
68 53, 60, 61, 54, 47, 55, 62, 63
69};
70
10acc479
RS
71/* Specific zigzag scan for 248 idct. NOTE that unlike the
72 specification, we interleave the fields */
73const uint8_t ff_zigzag248_direct[64] = {
74 0, 8, 1, 9, 16, 24, 2, 10,
75 17, 25, 32, 40, 48, 56, 33, 41,
76 18, 26, 3, 11, 4, 12, 19, 27,
77 34, 42, 49, 57, 50, 58, 35, 43,
78 20, 28, 5, 13, 6, 14, 21, 29,
79 36, 44, 51, 59, 52, 60, 37, 45,
80 22, 30, 7, 15, 23, 31, 38, 46,
81 53, 61, 54, 62, 39, 47, 55, 63,
82};
83
2f349de2 84/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
84dc2d8a 85DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
2f349de2 86
0c1a9eda 87const uint8_t ff_alternate_horizontal_scan[64] = {
115329f1 88 0, 1, 2, 3, 8, 9, 16, 17,
e0eac44e 89 10, 11, 4, 5, 6, 7, 15, 14,
115329f1 90 13, 12, 19, 18, 24, 25, 32, 33,
e0eac44e 91 26, 27, 20, 21, 22, 23, 28, 29,
115329f1 92 30, 31, 34, 35, 40, 41, 48, 49,
e0eac44e 93 42, 43, 36, 37, 38, 39, 44, 45,
115329f1 94 46, 47, 50, 51, 56, 57, 58, 59,
e0eac44e
FB
95 52, 53, 54, 55, 60, 61, 62, 63,
96};
97
0c1a9eda 98const uint8_t ff_alternate_vertical_scan[64] = {
115329f1 99 0, 8, 16, 24, 1, 9, 2, 10,
e0eac44e 100 17, 25, 32, 40, 48, 56, 57, 49,
115329f1 101 41, 33, 26, 18, 3, 11, 4, 12,
e0eac44e 102 19, 27, 34, 42, 50, 58, 35, 43,
115329f1 103 51, 59, 20, 28, 5, 13, 6, 14,
e0eac44e 104 21, 29, 36, 44, 52, 60, 37, 45,
115329f1 105 53, 61, 22, 30, 7, 15, 23, 31,
e0eac44e
FB
106 38, 46, 54, 62, 39, 47, 55, 63,
107};
108
b0368839
MN
109/* Input permutation for the simple_idct_mmx */
110static const uint8_t simple_mmx_permutation[64]={
bb270c08
DB
111 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
112 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
113 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
114 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
115 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
116 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
117 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
118 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
b0368839
MN
119};
120
0e956ba2
AS
121static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
122
4c79b95c
AJ
123void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
124 int i;
125 int end;
126
127 st->scantable= src_scantable;
128
129 for(i=0; i<64; i++){
130 int j;
131 j = src_scantable[i];
132 st->permutated[i] = permutation[j];
b250f9c6 133#if ARCH_PPC
4c79b95c
AJ
134 st->inverse[j] = i;
135#endif
136 }
137
138 end=-1;
139 for(i=0; i<64; i++){
140 int j;
141 j = st->permutated[i];
142 if(j>end) end=j;
143 st->raster_end[i]= end;
144 }
145}
146
92fb52d9
RB
147void ff_init_scantable_permutation(uint8_t *idct_permutation,
148 int idct_permutation_type)
149{
150 int i;
151
152 switch(idct_permutation_type){
153 case FF_NO_IDCT_PERM:
154 for(i=0; i<64; i++)
155 idct_permutation[i]= i;
156 break;
157 case FF_LIBMPEG2_IDCT_PERM:
158 for(i=0; i<64; i++)
159 idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
160 break;
161 case FF_SIMPLE_IDCT_PERM:
162 for(i=0; i<64; i++)
163 idct_permutation[i]= simple_mmx_permutation[i];
164 break;
165 case FF_TRANSPOSE_IDCT_PERM:
166 for(i=0; i<64; i++)
167 idct_permutation[i]= ((i&7)<<3) | (i>>3);
168 break;
169 case FF_PARTTRANS_IDCT_PERM:
170 for(i=0; i<64; i++)
171 idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
172 break;
173 case FF_SSE2_IDCT_PERM:
174 for(i=0; i<64; i++)
175 idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
176 break;
177 default:
178 av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
179 }
180}
181
0c1a9eda 182static int pix_sum_c(uint8_t * pix, int line_size)
3aa102be
MN
183{
184 int s, i, j;
185
186 s = 0;
187 for (i = 0; i < 16; i++) {
bb270c08
DB
188 for (j = 0; j < 16; j += 8) {
189 s += pix[0];
190 s += pix[1];
191 s += pix[2];
192 s += pix[3];
193 s += pix[4];
194 s += pix[5];
195 s += pix[6];
196 s += pix[7];
197 pix += 8;
198 }
199 pix += line_size - 16;
3aa102be
MN
200 }
201 return s;
202}
203
0c1a9eda 204static int pix_norm1_c(uint8_t * pix, int line_size)
3aa102be
MN
205{
206 int s, i, j;
1d503957 207 uint32_t *sq = ff_squareTbl + 256;
3aa102be
MN
208
209 s = 0;
210 for (i = 0; i < 16; i++) {
bb270c08 211 for (j = 0; j < 16; j += 8) {
e463f0e9
DB
212#if 0
213 s += sq[pix[0]];
214 s += sq[pix[1]];
215 s += sq[pix[2]];
216 s += sq[pix[3]];
217 s += sq[pix[4]];
218 s += sq[pix[5]];
219 s += sq[pix[6]];
220 s += sq[pix[7]];
221#else
d9a9f50a 222#if HAVE_FAST_64BIT
bb270c08
DB
223 register uint64_t x=*(uint64_t*)pix;
224 s += sq[x&0xff];
225 s += sq[(x>>8)&0xff];
226 s += sq[(x>>16)&0xff];
227 s += sq[(x>>24)&0xff];
2a006cd3
FL
228 s += sq[(x>>32)&0xff];
229 s += sq[(x>>40)&0xff];
230 s += sq[(x>>48)&0xff];
231 s += sq[(x>>56)&0xff];
232#else
bb270c08
DB
233 register uint32_t x=*(uint32_t*)pix;
234 s += sq[x&0xff];
235 s += sq[(x>>8)&0xff];
236 s += sq[(x>>16)&0xff];
237 s += sq[(x>>24)&0xff];
2a006cd3
FL
238 x=*(uint32_t*)(pix+4);
239 s += sq[x&0xff];
240 s += sq[(x>>8)&0xff];
241 s += sq[(x>>16)&0xff];
242 s += sq[(x>>24)&0xff];
243#endif
e463f0e9 244#endif
bb270c08
DB
245 pix += 8;
246 }
247 pix += line_size - 16;
3aa102be
MN
248 }
249 return s;
250}
251
96711ecf 252static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
3d2e8cce 253 int i;
115329f1 254
3d2e8cce 255 for(i=0; i+8<=w; i+=8){
8fc0162a
MR
256 dst[i+0]= av_bswap32(src[i+0]);
257 dst[i+1]= av_bswap32(src[i+1]);
258 dst[i+2]= av_bswap32(src[i+2]);
259 dst[i+3]= av_bswap32(src[i+3]);
260 dst[i+4]= av_bswap32(src[i+4]);
261 dst[i+5]= av_bswap32(src[i+5]);
262 dst[i+6]= av_bswap32(src[i+6]);
263 dst[i+7]= av_bswap32(src[i+7]);
3d2e8cce
MN
264 }
265 for(;i<w; i++){
8fc0162a 266 dst[i+0]= av_bswap32(src[i+0]);
3d2e8cce
MN
267 }
268}
3aa102be 269
381d37fd
MR
270static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
271{
272 while (len--)
273 *dst++ = av_bswap16(*src++);
274}
275
26efc54e
MN
276static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
277{
278 int s, i;
1d503957 279 uint32_t *sq = ff_squareTbl + 256;
26efc54e
MN
280
281 s = 0;
282 for (i = 0; i < h; i++) {
283 s += sq[pix1[0] - pix2[0]];
284 s += sq[pix1[1] - pix2[1]];
285 s += sq[pix1[2] - pix2[2]];
286 s += sq[pix1[3] - pix2[3]];
287 pix1 += line_size;
288 pix2 += line_size;
289 }
290 return s;
291}
292
bb198e19 293static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
1457ab52
MN
294{
295 int s, i;
1d503957 296 uint32_t *sq = ff_squareTbl + 256;
1457ab52
MN
297
298 s = 0;
bb198e19 299 for (i = 0; i < h; i++) {
1457ab52
MN
300 s += sq[pix1[0] - pix2[0]];
301 s += sq[pix1[1] - pix2[1]];
302 s += sq[pix1[2] - pix2[2]];
303 s += sq[pix1[3] - pix2[3]];
304 s += sq[pix1[4] - pix2[4]];
305 s += sq[pix1[5] - pix2[5]];
306 s += sq[pix1[6] - pix2[6]];
307 s += sq[pix1[7] - pix2[7]];
308 pix1 += line_size;
309 pix2 += line_size;
310 }
311 return s;
312}
313
bb198e19 314static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
9c76bd48 315{
6b026927 316 int s, i;
1d503957 317 uint32_t *sq = ff_squareTbl + 256;
9c76bd48
BF
318
319 s = 0;
bb198e19 320 for (i = 0; i < h; i++) {
6b026927
FH
321 s += sq[pix1[ 0] - pix2[ 0]];
322 s += sq[pix1[ 1] - pix2[ 1]];
323 s += sq[pix1[ 2] - pix2[ 2]];
324 s += sq[pix1[ 3] - pix2[ 3]];
325 s += sq[pix1[ 4] - pix2[ 4]];
326 s += sq[pix1[ 5] - pix2[ 5]];
327 s += sq[pix1[ 6] - pix2[ 6]];
328 s += sq[pix1[ 7] - pix2[ 7]];
329 s += sq[pix1[ 8] - pix2[ 8]];
330 s += sq[pix1[ 9] - pix2[ 9]];
331 s += sq[pix1[10] - pix2[10]];
332 s += sq[pix1[11] - pix2[11]];
333 s += sq[pix1[12] - pix2[12]];
334 s += sq[pix1[13] - pix2[13]];
335 s += sq[pix1[14] - pix2[14]];
336 s += sq[pix1[15] - pix2[15]];
2a006cd3 337
6b026927
FH
338 pix1 += line_size;
339 pix2 += line_size;
9c76bd48
BF
340 }
341 return s;
342}
343
0c1a9eda 344static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
bb270c08 345 const uint8_t *s2, int stride){
9dbcbd92
MN
346 int i;
347
348 /* read the pixels */
9dbcbd92 349 for(i=0;i<8;i++) {
c13e1abd
FH
350 block[0] = s1[0] - s2[0];
351 block[1] = s1[1] - s2[1];
352 block[2] = s1[2] - s2[2];
353 block[3] = s1[3] - s2[3];
354 block[4] = s1[4] - s2[4];
355 block[5] = s1[5] - s2[5];
356 block[6] = s1[6] - s2[6];
357 block[7] = s1[7] - s2[7];
9dbcbd92
MN
358 s1 += stride;
359 s2 += stride;
c13e1abd 360 block += 8;
9dbcbd92
MN
361 }
362}
363
364
484a337c
RB
365void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
366 int line_size)
de6d9b64 367{
de6d9b64 368 int i;
55fde95e 369 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 370
de6d9b64 371 /* read the pixels */
de6d9b64 372 for(i=0;i<8;i++) {
c13e1abd
FH
373 pixels[0] = cm[block[0]];
374 pixels[1] = cm[block[1]];
375 pixels[2] = cm[block[2]];
376 pixels[3] = cm[block[3]];
377 pixels[4] = cm[block[4]];
378 pixels[5] = cm[block[5]];
379 pixels[6] = cm[block[6]];
380 pixels[7] = cm[block[7]];
381
382 pixels += line_size;
383 block += 8;
de6d9b64
FB
384 }
385}
386
178fcca8 387static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
bb270c08 388 int line_size)
178fcca8
MN
389{
390 int i;
55fde95e 391 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 392
178fcca8
MN
393 /* read the pixels */
394 for(i=0;i<4;i++) {
395 pixels[0] = cm[block[0]];
396 pixels[1] = cm[block[1]];
397 pixels[2] = cm[block[2]];
398 pixels[3] = cm[block[3]];
399
400 pixels += line_size;
401 block += 8;
402 }
403}
404
9ca358b9 405static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
bb270c08 406 int line_size)
9ca358b9
MN
407{
408 int i;
55fde95e 409 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 410
9ca358b9
MN
411 /* read the pixels */
412 for(i=0;i<2;i++) {
413 pixels[0] = cm[block[0]];
414 pixels[1] = cm[block[1]];
415
416 pixels += line_size;
417 block += 8;
418 }
419}
420
484a337c
RB
421void ff_put_signed_pixels_clamped_c(const DCTELEM *block,
422 uint8_t *restrict pixels,
423 int line_size)
f9ed9d85
MM
424{
425 int i, j;
426
427 for (i = 0; i < 8; i++) {
428 for (j = 0; j < 8; j++) {
429 if (*block < -128)
430 *pixels = 0;
431 else if (*block > 127)
432 *pixels = 255;
433 else
434 *pixels = (uint8_t)(*block + 128);
435 block++;
436 pixels++;
437 }
438 pixels += (line_size - 8);
439 }
440}
441
484a337c
RB
442void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
443 int line_size)
de6d9b64 444{
de6d9b64 445 int i;
55fde95e 446 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 447
de6d9b64 448 /* read the pixels */
de6d9b64 449 for(i=0;i<8;i++) {
c13e1abd
FH
450 pixels[0] = cm[pixels[0] + block[0]];
451 pixels[1] = cm[pixels[1] + block[1]];
452 pixels[2] = cm[pixels[2] + block[2]];
453 pixels[3] = cm[pixels[3] + block[3]];
454 pixels[4] = cm[pixels[4] + block[4]];
455 pixels[5] = cm[pixels[5] + block[5]];
456 pixels[6] = cm[pixels[6] + block[6]];
457 pixels[7] = cm[pixels[7] + block[7]];
458 pixels += line_size;
459 block += 8;
de6d9b64
FB
460 }
461}
178fcca8
MN
462
463static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
464 int line_size)
465{
466 int i;
55fde95e 467 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 468
178fcca8
MN
469 /* read the pixels */
470 for(i=0;i<4;i++) {
471 pixels[0] = cm[pixels[0] + block[0]];
472 pixels[1] = cm[pixels[1] + block[1]];
473 pixels[2] = cm[pixels[2] + block[2]];
474 pixels[3] = cm[pixels[3] + block[3]];
475 pixels += line_size;
476 block += 8;
477 }
478}
9ca358b9
MN
479
480static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
481 int line_size)
482{
483 int i;
55fde95e 484 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 485
9ca358b9
MN
486 /* read the pixels */
487 for(i=0;i<2;i++) {
488 pixels[0] = cm[pixels[0] + block[0]];
489 pixels[1] = cm[pixels[1] + block[1]];
490 pixels += line_size;
491 block += 8;
492 }
493}
36940eca 494
1edbfe19
LM
495static int sum_abs_dctelem_c(DCTELEM *block)
496{
497 int sum=0, i;
498 for(i=0; i<64; i++)
499 sum+= FFABS(block[i]);
500 return sum;
501}
502
342c7dfd
KS
503static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
504{
505 int i;
506
507 for (i = 0; i < h; i++) {
508 memset(block, value, 16);
509 block += line_size;
510 }
511}
512
513static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
514{
515 int i;
516
517 for (i = 0; i < h; i++) {
518 memset(block, value, 8);
519 block += line_size;
520 }
521}
522
de6d9b64
FB
523#define avg2(a,b) ((a+b+1)>>1)
524#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
525
0c1a9eda 526static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
44eb4951
MN
527{
528 const int A=(16-x16)*(16-y16);
529 const int B=( x16)*(16-y16);
530 const int C=(16-x16)*( y16);
531 const int D=( x16)*( y16);
532 int i;
44eb4951
MN
533
534 for(i=0; i<h; i++)
535 {
b3184779
MN
536 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
537 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
538 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
539 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
540 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
541 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
542 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
543 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
544 dst+= stride;
545 src+= stride;
44eb4951
MN
546 }
547}
548
703c8195 549void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
073b013d
MN
550 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
551{
552 int y, vx, vy;
553 const int s= 1<<shift;
115329f1 554
073b013d
MN
555 width--;
556 height--;
557
558 for(y=0; y<h; y++){
559 int x;
560
561 vx= ox;
562 vy= oy;
563 for(x=0; x<8; x++){ //XXX FIXME optimize
564 int src_x, src_y, frac_x, frac_y, index;
565
566 src_x= vx>>16;
567 src_y= vy>>16;
568 frac_x= src_x&(s-1);
569 frac_y= src_y&(s-1);
570 src_x>>=shift;
571 src_y>>=shift;
115329f1 572
073b013d
MN
573 if((unsigned)src_x < width){
574 if((unsigned)src_y < height){
575 index= src_x + src_y*stride;
576 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
577 + src[index +1]* frac_x )*(s-frac_y)
578 + ( src[index+stride ]*(s-frac_x)
579 + src[index+stride+1]* frac_x )* frac_y
580 + r)>>(shift*2);
581 }else{
f66e4f5f 582 index= src_x + av_clip(src_y, 0, height)*stride;
115329f1 583 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
073b013d
MN
584 + src[index +1]* frac_x )*s
585 + r)>>(shift*2);
586 }
587 }else{
588 if((unsigned)src_y < height){
f66e4f5f 589 index= av_clip(src_x, 0, width) + src_y*stride;
115329f1 590 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
073b013d
MN
591 + src[index+stride ]* frac_y )*s
592 + r)>>(shift*2);
593 }else{
f66e4f5f 594 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
073b013d
MN
595 dst[y*stride + x]= src[index ];
596 }
597 }
115329f1 598
073b013d
MN
599 vx+= dxx;
600 vy+= dyx;
601 }
602 ox += dxy;
603 oy += dyy;
604 }
605}
669ac79c
MN
606
607static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
608 switch(width){
19a0729b
OA
609 case 2: put_pixels2_8_c (dst, src, stride, height); break;
610 case 4: put_pixels4_8_c (dst, src, stride, height); break;
611 case 8: put_pixels8_8_c (dst, src, stride, height); break;
612 case 16:put_pixels16_8_c(dst, src, stride, height); break;
669ac79c
MN
613 }
614}
615
616static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
617 int i,j;
618 for (i=0; i < height; i++) {
619 for (j=0; j < width; j++) {
bb270c08 620 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
669ac79c
MN
621 }
622 src += stride;
623 dst += stride;
624 }
625}
626
627static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
628 int i,j;
629 for (i=0; i < height; i++) {
630 for (j=0; j < width; j++) {
bb270c08 631 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
669ac79c
MN
632 }
633 src += stride;
634 dst += stride;
635 }
636}
115329f1 637
669ac79c
MN
638static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
639 int i,j;
640 for (i=0; i < height; i++) {
641 for (j=0; j < width; j++) {
bb270c08 642 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
669ac79c
MN
643 }
644 src += stride;
645 dst += stride;
646 }
647}
115329f1 648
669ac79c
MN
649static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
650 int i,j;
651 for (i=0; i < height; i++) {
652 for (j=0; j < width; j++) {
bb270c08 653 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
654 }
655 src += stride;
656 dst += stride;
657 }
658}
659
660static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
661 int i,j;
662 for (i=0; i < height; i++) {
663 for (j=0; j < width; j++) {
bb270c08 664 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
665 }
666 src += stride;
667 dst += stride;
668 }
669}
670
671static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
672 int i,j;
673 for (i=0; i < height; i++) {
674 for (j=0; j < width; j++) {
bb270c08 675 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
669ac79c
MN
676 }
677 src += stride;
678 dst += stride;
679 }
680}
681
682static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
683 int i,j;
684 for (i=0; i < height; i++) {
685 for (j=0; j < width; j++) {
bb270c08 686 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
687 }
688 src += stride;
689 dst += stride;
690 }
691}
692
693static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
694 int i,j;
695 for (i=0; i < height; i++) {
696 for (j=0; j < width; j++) {
bb270c08 697 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
698 }
699 src += stride;
700 dst += stride;
701 }
702}
da3b9756
MM
703
704static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
705 switch(width){
19a0729b
OA
706 case 2: avg_pixels2_8_c (dst, src, stride, height); break;
707 case 4: avg_pixels4_8_c (dst, src, stride, height); break;
708 case 8: avg_pixels8_8_c (dst, src, stride, height); break;
709 case 16:avg_pixels16_8_c(dst, src, stride, height); break;
da3b9756
MM
710 }
711}
712
713static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
714 int i,j;
715 for (i=0; i < height; i++) {
716 for (j=0; j < width; j++) {
bb270c08 717 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
718 }
719 src += stride;
720 dst += stride;
721 }
722}
723
724static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
725 int i,j;
726 for (i=0; i < height; i++) {
727 for (j=0; j < width; j++) {
bb270c08 728 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
729 }
730 src += stride;
731 dst += stride;
732 }
733}
115329f1 734
da3b9756
MM
735static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
736 int i,j;
737 for (i=0; i < height; i++) {
738 for (j=0; j < width; j++) {
bb270c08 739 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
740 }
741 src += stride;
742 dst += stride;
743 }
744}
115329f1 745
da3b9756
MM
746static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
747 int i,j;
748 for (i=0; i < height; i++) {
749 for (j=0; j < width; j++) {
bb270c08 750 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
751 }
752 src += stride;
753 dst += stride;
754 }
755}
756
757static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
758 int i,j;
759 for (i=0; i < height; i++) {
760 for (j=0; j < width; j++) {
bb270c08 761 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
762 }
763 src += stride;
764 dst += stride;
765 }
766}
767
768static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
769 int i,j;
770 for (i=0; i < height; i++) {
771 for (j=0; j < width; j++) {
bb270c08 772 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
773 }
774 src += stride;
775 dst += stride;
776 }
777}
778
779static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
780 int i,j;
781 for (i=0; i < height; i++) {
782 for (j=0; j < width; j++) {
bb270c08 783 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
784 }
785 src += stride;
786 dst += stride;
787 }
788}
789
790static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
791 int i,j;
792 for (i=0; i < height; i++) {
793 for (j=0; j < width; j++) {
bb270c08 794 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
795 }
796 src += stride;
797 dst += stride;
798 }
799}
669ac79c 800
b3184779 801#define QPEL_MC(r, OPNAME, RND, OP) \
0c1a9eda 802static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
55fde95e 803 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779
MN
804 int i;\
805 for(i=0; i<h; i++)\
806 {\
807 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
808 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
809 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
810 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
811 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
812 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
813 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
814 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
815 dst+=dstStride;\
816 src+=srcStride;\
817 }\
44eb4951
MN
818}\
819\
0c1a9eda 820static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
db794953 821 const int w=8;\
55fde95e 822 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779
MN
823 int i;\
824 for(i=0; i<w; i++)\
825 {\
826 const int src0= src[0*srcStride];\
827 const int src1= src[1*srcStride];\
828 const int src2= src[2*srcStride];\
829 const int src3= src[3*srcStride];\
830 const int src4= src[4*srcStride];\
831 const int src5= src[5*srcStride];\
832 const int src6= src[6*srcStride];\
833 const int src7= src[7*srcStride];\
834 const int src8= src[8*srcStride];\
835 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
836 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
837 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
838 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
839 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
840 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
841 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
842 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
843 dst++;\
844 src++;\
845 }\
846}\
847\
0c1a9eda 848static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
55fde95e 849 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779 850 int i;\
826f429a 851 \
b3184779
MN
852 for(i=0; i<h; i++)\
853 {\
854 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
855 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
856 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
857 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
858 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
859 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
860 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
861 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
862 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
863 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
864 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
865 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
866 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
867 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
868 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
869 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
870 dst+=dstStride;\
871 src+=srcStride;\
872 }\
873}\
874\
0c1a9eda 875static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
55fde95e 876 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779 877 int i;\
826f429a 878 const int w=16;\
b3184779
MN
879 for(i=0; i<w; i++)\
880 {\
881 const int src0= src[0*srcStride];\
882 const int src1= src[1*srcStride];\
883 const int src2= src[2*srcStride];\
884 const int src3= src[3*srcStride];\
885 const int src4= src[4*srcStride];\
886 const int src5= src[5*srcStride];\
887 const int src6= src[6*srcStride];\
888 const int src7= src[7*srcStride];\
889 const int src8= src[8*srcStride];\
890 const int src9= src[9*srcStride];\
891 const int src10= src[10*srcStride];\
892 const int src11= src[11*srcStride];\
893 const int src12= src[12*srcStride];\
894 const int src13= src[13*srcStride];\
895 const int src14= src[14*srcStride];\
896 const int src15= src[15*srcStride];\
897 const int src16= src[16*srcStride];\
898 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
899 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
900 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
901 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
902 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
903 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
904 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
905 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
906 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
907 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
908 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
909 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
910 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
911 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
912 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
913 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
914 dst++;\
915 src++;\
916 }\
917}\
918\
0c1a9eda
ZK
919static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
920 uint8_t half[64];\
b3184779 921 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
19a0729b 922 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
44eb4951
MN
923}\
924\
0c1a9eda 925static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
b3184779 926 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
44eb4951
MN
927}\
928\
0c1a9eda
ZK
929static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
930 uint8_t half[64];\
b3184779 931 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
19a0729b 932 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
44eb4951
MN
933}\
934\
0c1a9eda
ZK
935static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
936 uint8_t full[16*9];\
937 uint8_t half[64];\
b3184779 938 copy_block9(full, src, 16, stride, 9);\
db794953 939 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
19a0729b 940 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
44eb4951
MN
941}\
942\
0c1a9eda
ZK
943static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
944 uint8_t full[16*9];\
b3184779 945 copy_block9(full, src, 16, stride, 9);\
db794953 946 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
44eb4951
MN
947}\
948\
0c1a9eda
ZK
949static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
950 uint8_t full[16*9];\
951 uint8_t half[64];\
b3184779 952 copy_block9(full, src, 16, stride, 9);\
db794953 953 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
19a0729b 954 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
44eb4951 955}\
0c1a9eda
ZK
956void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
957 uint8_t full[16*9];\
958 uint8_t halfH[72];\
959 uint8_t halfV[64];\
960 uint8_t halfHV[64];\
b3184779
MN
961 copy_block9(full, src, 16, stride, 9);\
962 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
963 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
964 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 965 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 966}\
0c1a9eda
ZK
967static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
968 uint8_t full[16*9];\
969 uint8_t halfH[72];\
970 uint8_t halfHV[64];\
db794953
MN
971 copy_block9(full, src, 16, stride, 9);\
972 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
19a0729b 973 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
db794953 974 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 975 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
db794953 976}\
0c1a9eda
ZK
977void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
978 uint8_t full[16*9];\
979 uint8_t halfH[72];\
980 uint8_t halfV[64];\
981 uint8_t halfHV[64];\
b3184779
MN
982 copy_block9(full, src, 16, stride, 9);\
983 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
984 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
985 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 986 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 987}\
0c1a9eda
ZK
988static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
989 uint8_t full[16*9];\
990 uint8_t halfH[72];\
991 uint8_t halfHV[64];\
db794953
MN
992 copy_block9(full, src, 16, stride, 9);\
993 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
19a0729b 994 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
db794953 995 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 996 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
db794953 997}\
0c1a9eda
ZK
998void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
999 uint8_t full[16*9];\
1000 uint8_t halfH[72];\
1001 uint8_t halfV[64];\
1002 uint8_t halfHV[64];\
b3184779
MN
1003 copy_block9(full, src, 16, stride, 9);\
1004 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1005 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1006 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 1007 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1008}\
0c1a9eda
ZK
1009static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1010 uint8_t full[16*9];\
1011 uint8_t halfH[72];\
1012 uint8_t halfHV[64];\
db794953
MN
1013 copy_block9(full, src, 16, stride, 9);\
1014 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
19a0729b 1015 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
db794953 1016 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 1017 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
db794953 1018}\
0c1a9eda
ZK
1019void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1020 uint8_t full[16*9];\
1021 uint8_t halfH[72];\
1022 uint8_t halfV[64];\
1023 uint8_t halfHV[64];\
b3184779
MN
1024 copy_block9(full, src, 16, stride, 9);\
1025 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
db794953
MN
1026 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1027 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 1028 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1029}\
0c1a9eda
ZK
1030static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1031 uint8_t full[16*9];\
1032 uint8_t halfH[72];\
1033 uint8_t halfHV[64];\
db794953
MN
1034 copy_block9(full, src, 16, stride, 9);\
1035 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
19a0729b 1036 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
db794953 1037 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 1038 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
db794953 1039}\
0c1a9eda
ZK
1040static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1041 uint8_t halfH[72];\
1042 uint8_t halfHV[64];\
b3184779 1043 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1044 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 1045 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
44eb4951 1046}\
0c1a9eda
ZK
1047static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1048 uint8_t halfH[72];\
1049 uint8_t halfHV[64];\
b3184779 1050 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1051 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 1052 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
44eb4951 1053}\
0c1a9eda
ZK
1054void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1055 uint8_t full[16*9];\
1056 uint8_t halfH[72];\
1057 uint8_t halfV[64];\
1058 uint8_t halfHV[64];\
b3184779
MN
1059 copy_block9(full, src, 16, stride, 9);\
1060 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1061 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1062 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 1063 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 1064}\
0c1a9eda
ZK
1065static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1066 uint8_t full[16*9];\
1067 uint8_t halfH[72];\
db794953
MN
1068 copy_block9(full, src, 16, stride, 9);\
1069 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
19a0729b 1070 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
db794953
MN
1071 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1072}\
0c1a9eda
ZK
1073void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1074 uint8_t full[16*9];\
1075 uint8_t halfH[72];\
1076 uint8_t halfV[64];\
1077 uint8_t halfHV[64];\
b3184779
MN
1078 copy_block9(full, src, 16, stride, 9);\
1079 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1080 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1081 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 1082 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 1083}\
0c1a9eda
ZK
1084static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1085 uint8_t full[16*9];\
1086 uint8_t halfH[72];\
db794953
MN
1087 copy_block9(full, src, 16, stride, 9);\
1088 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
19a0729b 1089 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
db794953
MN
1090 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1091}\
0c1a9eda
ZK
1092static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1093 uint8_t halfH[72];\
b3184779 1094 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1095 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
b3184779 1096}\
b3184779 1097\
0c1a9eda
ZK
1098static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1099 uint8_t half[256];\
b3184779 1100 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
19a0729b 1101 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
b3184779
MN
1102}\
1103\
0c1a9eda 1104static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
b3184779 1105 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
44eb4951 1106}\
b3184779 1107\
0c1a9eda
ZK
1108static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1109 uint8_t half[256];\
b3184779 1110 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
19a0729b 1111 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
b3184779
MN
1112}\
1113\
0c1a9eda
ZK
1114static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1115 uint8_t full[24*17];\
1116 uint8_t half[256];\
b3184779 1117 copy_block17(full, src, 24, stride, 17);\
826f429a 1118 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
19a0729b 1119 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
b3184779
MN
1120}\
1121\
0c1a9eda
ZK
1122static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1123 uint8_t full[24*17];\
b3184779 1124 copy_block17(full, src, 24, stride, 17);\
826f429a 1125 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
b3184779
MN
1126}\
1127\
0c1a9eda
ZK
1128static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1129 uint8_t full[24*17];\
1130 uint8_t half[256];\
b3184779 1131 copy_block17(full, src, 24, stride, 17);\
826f429a 1132 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
19a0729b 1133 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
b3184779 1134}\
0c1a9eda
ZK
1135void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1136 uint8_t full[24*17];\
1137 uint8_t halfH[272];\
1138 uint8_t halfV[256];\
1139 uint8_t halfHV[256];\
b3184779
MN
1140 copy_block17(full, src, 24, stride, 17);\
1141 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1142 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1143 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1144 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
b3184779 1145}\
0c1a9eda
ZK
1146static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1147 uint8_t full[24*17];\
1148 uint8_t halfH[272];\
1149 uint8_t halfHV[256];\
db794953
MN
1150 copy_block17(full, src, 24, stride, 17);\
1151 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
19a0729b 1152 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
db794953 1153 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1154 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
db794953 1155}\
0c1a9eda
ZK
1156void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1157 uint8_t full[24*17];\
1158 uint8_t halfH[272];\
1159 uint8_t halfV[256];\
1160 uint8_t halfHV[256];\
b3184779
MN
1161 copy_block17(full, src, 24, stride, 17);\
1162 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1163 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1164 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1165 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
b3184779 1166}\
0c1a9eda
ZK
1167static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1168 uint8_t full[24*17];\
1169 uint8_t halfH[272];\
1170 uint8_t halfHV[256];\
db794953
MN
1171 copy_block17(full, src, 24, stride, 17);\
1172 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
19a0729b 1173 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
db794953 1174 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1175 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
db794953 1176}\
0c1a9eda
ZK
1177void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1178 uint8_t full[24*17];\
1179 uint8_t halfH[272];\
1180 uint8_t halfV[256];\
1181 uint8_t halfHV[256];\
b3184779
MN
1182 copy_block17(full, src, 24, stride, 17);\
1183 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1184 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1185 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1186 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
b3184779 1187}\
0c1a9eda
ZK
1188static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1189 uint8_t full[24*17];\
1190 uint8_t halfH[272];\
1191 uint8_t halfHV[256];\
db794953
MN
1192 copy_block17(full, src, 24, stride, 17);\
1193 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
19a0729b 1194 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
db794953 1195 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1196 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
db794953 1197}\
0c1a9eda
ZK
1198void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1199 uint8_t full[24*17];\
1200 uint8_t halfH[272];\
1201 uint8_t halfV[256];\
1202 uint8_t halfHV[256];\
b3184779
MN
1203 copy_block17(full, src, 24, stride, 17);\
1204 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
826f429a
MN
1205 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1206 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1207 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
b3184779 1208}\
0c1a9eda
ZK
1209static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1210 uint8_t full[24*17];\
1211 uint8_t halfH[272];\
1212 uint8_t halfHV[256];\
db794953
MN
1213 copy_block17(full, src, 24, stride, 17);\
1214 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
19a0729b 1215 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
db794953 1216 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1217 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
db794953 1218}\
0c1a9eda
ZK
1219static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1220 uint8_t halfH[272];\
1221 uint8_t halfHV[256];\
b3184779 1222 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1223 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1224 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
b3184779 1225}\
0c1a9eda
ZK
1226static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1227 uint8_t halfH[272];\
1228 uint8_t halfHV[256];\
b3184779 1229 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1230 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1231 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
b3184779 1232}\
0c1a9eda
ZK
1233void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1234 uint8_t full[24*17];\
1235 uint8_t halfH[272];\
1236 uint8_t halfV[256];\
1237 uint8_t halfHV[256];\
b3184779
MN
1238 copy_block17(full, src, 24, stride, 17);\
1239 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1240 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1241 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1242 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
b3184779 1243}\
0c1a9eda
ZK
1244static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1245 uint8_t full[24*17];\
1246 uint8_t halfH[272];\
db794953
MN
1247 copy_block17(full, src, 24, stride, 17);\
1248 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
19a0729b 1249 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
db794953
MN
1250 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1251}\
0c1a9eda
ZK
1252void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1253 uint8_t full[24*17];\
1254 uint8_t halfH[272];\
1255 uint8_t halfV[256];\
1256 uint8_t halfHV[256];\
b3184779
MN
1257 copy_block17(full, src, 24, stride, 17);\
1258 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1259 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1260 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1261 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
b3184779 1262}\
0c1a9eda
ZK
1263static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1264 uint8_t full[24*17];\
1265 uint8_t halfH[272];\
db794953
MN
1266 copy_block17(full, src, 24, stride, 17);\
1267 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
19a0729b 1268 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
db794953
MN
1269 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1270}\
0c1a9eda
ZK
1271static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1272 uint8_t halfH[272];\
b3184779 1273 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1274 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
45553457 1275}
44eb4951 1276
b3184779
MN
1277#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1278#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1279#define op_put(a, b) a = cm[((b) + 16)>>5]
1280#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1281
1282QPEL_MC(0, put_ , _ , op_put)
1283QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1284QPEL_MC(0, avg_ , _ , op_avg)
1285//QPEL_MC(1, avg_no_rnd , _ , op_avg)
1286#undef op_avg
1287#undef op_avg_no_rnd
1288#undef op_put
1289#undef op_put_no_rnd
44eb4951 1290
3d1b1caa
MR
1291#define put_qpel8_mc00_c ff_put_pixels8x8_c
1292#define avg_qpel8_mc00_c ff_avg_pixels8x8_c
1293#define put_qpel16_mc00_c ff_put_pixels16x16_c
1294#define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1295#define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
19a0729b 1296#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
3d1b1caa 1297
1457ab52 1298static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
55fde95e 1299 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1457ab52
MN
1300 int i;
1301
1302 for(i=0; i<h; i++){
1303 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1304 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1305 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1306 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1307 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1308 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1309 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1310 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1311 dst+=dstStride;
115329f1 1312 src+=srcStride;
1457ab52
MN
1313 }
1314}
1315
b250f9c6 1316#if CONFIG_RV40_DECODER
d241f51e 1317void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
19a0729b 1318 put_pixels16_xy2_8_c(dst, src, stride, 16);
2d8a0815 1319}
d241f51e 1320void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
19a0729b 1321 avg_pixels16_xy2_8_c(dst, src, stride, 16);
2d8a0815 1322}
d241f51e 1323void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
19a0729b 1324 put_pixels8_xy2_8_c(dst, src, stride, 8);
2d8a0815 1325}
d241f51e 1326void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
19a0729b 1327 avg_pixels8_xy2_8_c(dst, src, stride, 8);
2d8a0815 1328}
2d8a0815
KS
1329#endif /* CONFIG_RV40_DECODER */
1330
1457ab52 1331static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
55fde95e 1332 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1457ab52
MN
1333 int i;
1334
1335 for(i=0; i<w; i++){
1336 const int src_1= src[ -srcStride];
1337 const int src0 = src[0 ];
1338 const int src1 = src[ srcStride];
1339 const int src2 = src[2*srcStride];
1340 const int src3 = src[3*srcStride];
1341 const int src4 = src[4*srcStride];
1342 const int src5 = src[5*srcStride];
1343 const int src6 = src[6*srcStride];
1344 const int src7 = src[7*srcStride];
1345 const int src8 = src[8*srcStride];
1346 const int src9 = src[9*srcStride];
1347 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1348 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1349 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1350 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1351 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1352 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1353 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1354 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1355 src++;
1356 dst++;
1357 }
1358}
1359
1457ab52
MN
1360static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
1361 uint8_t half[64];
1362 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
19a0729b 1363 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
1457ab52
MN
1364}
1365
1366static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
1367 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
1368}
1369
1370static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
1371 uint8_t half[64];
1372 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
19a0729b 1373 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
1457ab52
MN
1374}
1375
1376static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
1377 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
1378}
1379
1380static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
1381 uint8_t halfH[88];
1382 uint8_t halfV[64];
1383 uint8_t halfHV[64];
1384 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1385 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1386 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
19a0729b 1387 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1457ab52
MN
1388}
1389static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
1390 uint8_t halfH[88];
1391 uint8_t halfV[64];
1392 uint8_t halfHV[64];
1393 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1394 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1395 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
19a0729b 1396 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1457ab52
MN
1397}
1398static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
1399 uint8_t halfH[88];
1400 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1401 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
1402}
1403
332f9ac4 1404static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
4052cbf1 1405 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
332f9ac4
MN
1406 int x;
1407 const int strength= ff_h263_loop_filter_strength[qscale];
115329f1 1408
332f9ac4
MN
1409 for(x=0; x<8; x++){
1410 int d1, d2, ad1;
1411 int p0= src[x-2*stride];
1412 int p1= src[x-1*stride];
1413 int p2= src[x+0*stride];
1414 int p3= src[x+1*stride];
1415 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1416
1417 if (d<-2*strength) d1= 0;
1418 else if(d<- strength) d1=-2*strength - d;
1419 else if(d< strength) d1= d;
1420 else if(d< 2*strength) d1= 2*strength - d;
1421 else d1= 0;
115329f1 1422
332f9ac4
MN
1423 p1 += d1;
1424 p2 -= d1;
1425 if(p1&256) p1= ~(p1>>31);
1426 if(p2&256) p2= ~(p2>>31);
115329f1 1427
332f9ac4
MN
1428 src[x-1*stride] = p1;
1429 src[x+0*stride] = p2;
1430
c26abfa5 1431 ad1= FFABS(d1)>>1;
115329f1 1432
f66e4f5f 1433 d2= av_clip((p0-p3)/4, -ad1, ad1);
115329f1 1434
332f9ac4
MN
1435 src[x-2*stride] = p0 - d2;
1436 src[x+ stride] = p3 + d2;
1437 }
73f51a4d 1438 }
332f9ac4
MN
1439}
1440
1441static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
4052cbf1 1442 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
332f9ac4
MN
1443 int y;
1444 const int strength= ff_h263_loop_filter_strength[qscale];
115329f1 1445
332f9ac4
MN
1446 for(y=0; y<8; y++){
1447 int d1, d2, ad1;
1448 int p0= src[y*stride-2];
1449 int p1= src[y*stride-1];
1450 int p2= src[y*stride+0];
1451 int p3= src[y*stride+1];
1452 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1453
1454 if (d<-2*strength) d1= 0;
1455 else if(d<- strength) d1=-2*strength - d;
1456 else if(d< strength) d1= d;
1457 else if(d< 2*strength) d1= 2*strength - d;
1458 else d1= 0;
115329f1 1459
332f9ac4
MN
1460 p1 += d1;
1461 p2 -= d1;
1462 if(p1&256) p1= ~(p1>>31);
1463 if(p2&256) p2= ~(p2>>31);
115329f1 1464
332f9ac4
MN
1465 src[y*stride-1] = p1;
1466 src[y*stride+0] = p2;
1467
c26abfa5 1468 ad1= FFABS(d1)>>1;
115329f1 1469
f66e4f5f 1470 d2= av_clip((p0-p3)/4, -ad1, ad1);
115329f1 1471
332f9ac4
MN
1472 src[y*stride-2] = p0 - d2;
1473 src[y*stride+1] = p3 + d2;
1474 }
73f51a4d 1475 }
332f9ac4 1476}
1457ab52 1477
fdbbf2e0
MN
1478static void h261_loop_filter_c(uint8_t *src, int stride){
1479 int x,y,xy,yz;
1480 int temp[64];
1481
1482 for(x=0; x<8; x++){
1483 temp[x ] = 4*src[x ];
1484 temp[x + 7*8] = 4*src[x + 7*stride];
1485 }
1486 for(y=1; y<7; y++){
1487 for(x=0; x<8; x++){
1488 xy = y * stride + x;
1489 yz = y * 8 + x;
1490 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
c6148de2
MN
1491 }
1492 }
115329f1 1493
fdbbf2e0
MN
1494 for(y=0; y<8; y++){
1495 src[ y*stride] = (temp[ y*8] + 2)>>2;
1496 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
1497 for(x=1; x<7; x++){
1498 xy = y * stride + x;
1499 yz = y * 8 + x;
1500 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
c6148de2
MN
1501 }
1502 }
1503}
1504
bb198e19 1505static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
1506{
1507 int s, i;
1508
1509 s = 0;
bb198e19 1510 for(i=0;i<h;i++) {
de6d9b64
FB
1511 s += abs(pix1[0] - pix2[0]);
1512 s += abs(pix1[1] - pix2[1]);
1513 s += abs(pix1[2] - pix2[2]);
1514 s += abs(pix1[3] - pix2[3]);
1515 s += abs(pix1[4] - pix2[4]);
1516 s += abs(pix1[5] - pix2[5]);
1517 s += abs(pix1[6] - pix2[6]);
1518 s += abs(pix1[7] - pix2[7]);
1519 s += abs(pix1[8] - pix2[8]);
1520 s += abs(pix1[9] - pix2[9]);
1521 s += abs(pix1[10] - pix2[10]);
1522 s += abs(pix1[11] - pix2[11]);
1523 s += abs(pix1[12] - pix2[12]);
1524 s += abs(pix1[13] - pix2[13]);
1525 s += abs(pix1[14] - pix2[14]);
1526 s += abs(pix1[15] - pix2[15]);
1527 pix1 += line_size;
1528 pix2 += line_size;
1529 }
1530 return s;
1531}
1532
bb198e19 1533static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
1534{
1535 int s, i;
1536
1537 s = 0;
bb198e19 1538 for(i=0;i<h;i++) {
de6d9b64
FB
1539 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1540 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1541 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1542 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1543 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1544 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1545 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1546 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1547 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
1548 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
1549 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
1550 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
1551 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
1552 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
1553 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
1554 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
1555 pix1 += line_size;
1556 pix2 += line_size;
1557 }
1558 return s;
1559}
1560
bb198e19 1561static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
1562{
1563 int s, i;
0c1a9eda 1564 uint8_t *pix3 = pix2 + line_size;
de6d9b64
FB
1565
1566 s = 0;
bb198e19 1567 for(i=0;i<h;i++) {
de6d9b64
FB
1568 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1569 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1570 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1571 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1572 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1573 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1574 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1575 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1576 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
1577 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
1578 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
1579 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
1580 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
1581 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
1582 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
1583 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
1584 pix1 += line_size;
1585 pix2 += line_size;
1586 pix3 += line_size;
1587 }
1588 return s;
1589}
1590
bb198e19 1591static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
1592{
1593 int s, i;
0c1a9eda 1594 uint8_t *pix3 = pix2 + line_size;
de6d9b64
FB
1595
1596 s = 0;
bb198e19 1597 for(i=0;i<h;i++) {
de6d9b64
FB
1598 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1599 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1600 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1601 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1602 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1603 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1604 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1605 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1606 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1607 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1608 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1609 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1610 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1611 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1612 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1613 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1614 pix1 += line_size;
1615 pix2 += line_size;
1616 pix3 += line_size;
1617 }
1618 return s;
1619}
1620
bb198e19 1621static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
1622{
1623 int s, i;
1624
1625 s = 0;
bb198e19 1626 for(i=0;i<h;i++) {
ba6802de
MN
1627 s += abs(pix1[0] - pix2[0]);
1628 s += abs(pix1[1] - pix2[1]);
1629 s += abs(pix1[2] - pix2[2]);
1630 s += abs(pix1[3] - pix2[3]);
1631 s += abs(pix1[4] - pix2[4]);
1632 s += abs(pix1[5] - pix2[5]);
1633 s += abs(pix1[6] - pix2[6]);
1634 s += abs(pix1[7] - pix2[7]);
1635 pix1 += line_size;
1636 pix2 += line_size;
1637 }
1638 return s;
1639}
1640
bb198e19 1641static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
1642{
1643 int s, i;
1644
1645 s = 0;
bb198e19 1646 for(i=0;i<h;i++) {
ba6802de
MN
1647 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1648 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1649 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1650 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1651 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1652 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1653 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1654 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1655 pix1 += line_size;
1656 pix2 += line_size;
1657 }
1658 return s;
1659}
1660
bb198e19 1661static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
1662{
1663 int s, i;
0c1a9eda 1664 uint8_t *pix3 = pix2 + line_size;
ba6802de
MN
1665
1666 s = 0;
bb198e19 1667 for(i=0;i<h;i++) {
ba6802de
MN
1668 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1669 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1670 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1671 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1672 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1673 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1674 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1675 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1676 pix1 += line_size;
1677 pix2 += line_size;
1678 pix3 += line_size;
1679 }
1680 return s;
1681}
1682
bb198e19 1683static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
1684{
1685 int s, i;
0c1a9eda 1686 uint8_t *pix3 = pix2 + line_size;
ba6802de
MN
1687
1688 s = 0;
bb198e19 1689 for(i=0;i<h;i++) {
ba6802de
MN
1690 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1691 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1692 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1693 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1694 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1695 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1696 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1697 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1698 pix1 += line_size;
1699 pix2 += line_size;
1700 pix3 += line_size;
1701 }
1702 return s;
1703}
1704
bf4e3bd2
MR
1705static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1706 MpegEncContext *c = v;
e6a2ac34
MN
1707 int score1=0;
1708 int score2=0;
1709 int x,y;
d4c5d2ad 1710
e6a2ac34
MN
1711 for(y=0; y<h; y++){
1712 for(x=0; x<16; x++){
1713 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1714 }
1715 if(y+1<h){
1716 for(x=0; x<15; x++){
c26abfa5 1717 score2+= FFABS( s1[x ] - s1[x +stride]
e6a2ac34 1718 - s1[x+1] + s1[x+1+stride])
c26abfa5 1719 -FFABS( s2[x ] - s2[x +stride]
e6a2ac34
MN
1720 - s2[x+1] + s2[x+1+stride]);
1721 }
1722 }
1723 s1+= stride;
1724 s2+= stride;
1725 }
d4c5d2ad 1726
c26abfa5
DB
1727 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1728 else return score1 + FFABS(score2)*8;
e6a2ac34
MN
1729}
1730
bf4e3bd2
MR
1731static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1732 MpegEncContext *c = v;
e6a2ac34
MN
1733 int score1=0;
1734 int score2=0;
1735 int x,y;
115329f1 1736
e6a2ac34
MN
1737 for(y=0; y<h; y++){
1738 for(x=0; x<8; x++){
1739 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1740 }
1741 if(y+1<h){
1742 for(x=0; x<7; x++){
c26abfa5 1743 score2+= FFABS( s1[x ] - s1[x +stride]
e6a2ac34 1744 - s1[x+1] + s1[x+1+stride])
c26abfa5 1745 -FFABS( s2[x ] - s2[x +stride]
e6a2ac34
MN
1746 - s2[x+1] + s2[x+1+stride]);
1747 }
1748 }
1749 s1+= stride;
1750 s2+= stride;
1751 }
115329f1 1752
c26abfa5
DB
1753 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1754 else return score1 + FFABS(score2)*8;
e6a2ac34
MN
1755}
1756
364a1797
MN
1757static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
1758 int i;
1759 unsigned int sum=0;
1760
1761 for(i=0; i<8*8; i++){
1762 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
1763 int w= weight[i];
1764 b>>= RECON_SHIFT;
1765 assert(-512<b && b<512);
1766
1767 sum += (w*b)*(w*b)>>4;
1768 }
1769 return sum>>2;
1770}
1771
1772static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
1773 int i;
1774
1775 for(i=0; i<8*8; i++){
1776 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
115329f1 1777 }
364a1797
MN
1778}
1779
a9badb51 1780/**
58c42af7 1781 * Permute an 8x8 block.
2a5700de 1782 * @param block the block which will be permuted according to the given permutation vector
a9badb51
MN
1783 * @param permutation the permutation vector
1784 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
115329f1 1785 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
2a5700de 1786 * (inverse) permutated to scantable order!
a9badb51 1787 */
0c1a9eda 1788void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
d962f6fd 1789{
7801d21d 1790 int i;
477ab036 1791 DCTELEM temp[64];
115329f1 1792
7801d21d 1793 if(last<=0) return;
90b5b51e 1794 //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
d962f6fd 1795
7801d21d
MN
1796 for(i=0; i<=last; i++){
1797 const int j= scantable[i];
1798 temp[j]= block[j];
1799 block[j]=0;
1800 }
115329f1 1801
7801d21d
MN
1802 for(i=0; i<=last; i++){
1803 const int j= scantable[i];
1804 const int perm_j= permutation[j];
1805 block[perm_j]= temp[j];
1806 }
d962f6fd 1807}
e0eac44e 1808
622348f9
MN
1809static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
1810 return 0;
1811}
1812
1813void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
1814 int i;
115329f1 1815
3899eb2f 1816 memset(cmp, 0, sizeof(void*)*6);
115329f1 1817
3899eb2f 1818 for(i=0; i<6; i++){
622348f9
MN
1819 switch(type&0xFF){
1820 case FF_CMP_SAD:
1821 cmp[i]= c->sad[i];
1822 break;
1823 case FF_CMP_SATD:
1824 cmp[i]= c->hadamard8_diff[i];
1825 break;
1826 case FF_CMP_SSE:
1827 cmp[i]= c->sse[i];
1828 break;
1829 case FF_CMP_DCT:
1830 cmp[i]= c->dct_sad[i];
1831 break;
27c61ac5
MN
1832 case FF_CMP_DCT264:
1833 cmp[i]= c->dct264_sad[i];
1834 break;
0fd6aea1
MN
1835 case FF_CMP_DCTMAX:
1836 cmp[i]= c->dct_max[i];
1837 break;
622348f9
MN
1838 case FF_CMP_PSNR:
1839 cmp[i]= c->quant_psnr[i];
1840 break;
1841 case FF_CMP_BIT:
1842 cmp[i]= c->bit[i];
1843 break;
1844 case FF_CMP_RD:
1845 cmp[i]= c->rd[i];
1846 break;
1847 case FF_CMP_VSAD:
1848 cmp[i]= c->vsad[i];
1849 break;
1850 case FF_CMP_VSSE:
1851 cmp[i]= c->vsse[i];
1852 break;
1853 case FF_CMP_ZERO:
1854 cmp[i]= zero_cmp;
1855 break;
e6a2ac34
MN
1856 case FF_CMP_NSSE:
1857 cmp[i]= c->nsse[i];
1858 break;
05aec7bb 1859#if CONFIG_DWT
26efc54e
MN
1860 case FF_CMP_W53:
1861 cmp[i]= c->w53[i];
1862 break;
1863 case FF_CMP_W97:
1864 cmp[i]= c->w97[i];
1865 break;
3a6fc8fa 1866#endif
622348f9
MN
1867 default:
1868 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
1869 }
1870 }
1871}
1872
11f18faf 1873static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
469bd7b1
LM
1874 long i;
1875 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1876 long a = *(long*)(src+i);
1877 long b = *(long*)(dst+i);
1878 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
11f18faf
MN
1879 }
1880 for(; i<w; i++)
1881 dst[i+0] += src[i+0];
1882}
1883
1884static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
469bd7b1 1885 long i;
b250f9c6 1886#if !HAVE_FAST_UNALIGNED
469bd7b1 1887 if((long)src2 & (sizeof(long)-1)){
31304587
LM
1888 for(i=0; i+7<w; i+=8){
1889 dst[i+0] = src1[i+0]-src2[i+0];
1890 dst[i+1] = src1[i+1]-src2[i+1];
1891 dst[i+2] = src1[i+2]-src2[i+2];
1892 dst[i+3] = src1[i+3]-src2[i+3];
1893 dst[i+4] = src1[i+4]-src2[i+4];
1894 dst[i+5] = src1[i+5]-src2[i+5];
1895 dst[i+6] = src1[i+6]-src2[i+6];
1896 dst[i+7] = src1[i+7]-src2[i+7];
1897 }
469bd7b1
LM
1898 }else
1899#endif
1900 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1901 long a = *(long*)(src1+i);
1902 long b = *(long*)(src2+i);
1903 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
1904 }
11f18faf
MN
1905 for(; i<w; i++)
1906 dst[i+0] = src1[i+0]-src2[i+0];
1907}
1908
e17ccf60 1909static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
3daa434a
LM
1910 int i;
1911 uint8_t l, lt;
1912
1913 l= *left;
1914 lt= *left_top;
1915
1916 for(i=0; i<w; i++){
1917 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
1918 lt= src1[i];
1919 dst[i]= l;
1920 }
1921
1922 *left= l;
1923 *left_top= lt;
1924}
1925
e17ccf60 1926static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
84705403
MN
1927 int i;
1928 uint8_t l, lt;
1929
1930 l= *left;
1931 lt= *left_top;
1932
1933 for(i=0; i<w; i++){
1934 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
1935 lt= src1[i];
1936 l= src2[i];
1937 dst[i]= l - pred;
115329f1 1938 }
84705403
MN
1939
1940 *left= l;
1941 *left_top= lt;
1942}
1943
2d4bbdec 1944static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
73c6f598
NC
1945 int i;
1946
1947 for(i=0; i<w-1; i++){
1948 acc+= src[i];
1949 dst[i]= acc;
1950 i++;
1951 acc+= src[i];
1952 dst[i]= acc;
1953 }
1954
1955 for(; i<w; i++){
1956 acc+= src[i];
1957 dst[i]= acc;
1958 }
1959
1960 return acc;
1961}
1962
1963#if HAVE_BIGENDIAN
1964#define B 3
1965#define G 2
1966#define R 1
f267d3ac 1967#define A 0
73c6f598
NC
1968#else
1969#define B 0
1970#define G 1
1971#define R 2
f267d3ac 1972#define A 3
73c6f598 1973#endif
f267d3ac 1974static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
73c6f598 1975 int i;
f267d3ac 1976 int r,g,b,a;
73c6f598
NC
1977 r= *red;
1978 g= *green;
1979 b= *blue;
f267d3ac 1980 a= *alpha;
73c6f598
NC
1981
1982 for(i=0; i<w; i++){
1983 b+= src[4*i+B];
1984 g+= src[4*i+G];
1985 r+= src[4*i+R];
f267d3ac 1986 a+= src[4*i+A];
73c6f598
NC
1987
1988 dst[4*i+B]= b;
1989 dst[4*i+G]= g;
1990 dst[4*i+R]= r;
f267d3ac 1991 dst[4*i+A]= a;
73c6f598
NC
1992 }
1993
1994 *red= r;
1995 *green= g;
1996 *blue= b;
f267d3ac 1997 *alpha= a;
73c6f598
NC
1998}
1999#undef B
2000#undef G
2001#undef R
f267d3ac 2002#undef A
73c6f598 2003
1457ab52
MN
2004#define BUTTERFLY2(o1,o2,i1,i2) \
2005o1= (i1)+(i2);\
2006o2= (i1)-(i2);
2007
2008#define BUTTERFLY1(x,y) \
2009{\
2010 int a,b;\
2011 a= x;\
2012 b= y;\
2013 x= a+b;\
2014 y= a-b;\
2015}
2016
c26abfa5 2017#define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
1457ab52 2018
bb198e19 2019static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
1457ab52
MN
2020 int i;
2021 int temp[64];
2022 int sum=0;
115329f1 2023
bb198e19 2024 assert(h==8);
1457ab52
MN
2025
2026 for(i=0; i<8; i++){
2027 //FIXME try pointer walks
2028 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
2029 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
2030 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
2031 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
115329f1 2032
1457ab52
MN
2033 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2034 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2035 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2036 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
115329f1 2037
1457ab52
MN
2038 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2039 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2040 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2041 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2042 }
2043
2044 for(i=0; i<8; i++){
2045 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2046 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2047 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2048 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
115329f1 2049
1457ab52
MN
2050 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2051 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2052 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2053 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2054
115329f1 2055 sum +=
1457ab52
MN
2056 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2057 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2058 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2059 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2060 }
1457ab52
MN
2061 return sum;
2062}
2063
622348f9 2064static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
1457ab52
MN
2065 int i;
2066 int temp[64];
2067 int sum=0;
115329f1 2068
622348f9 2069 assert(h==8);
115329f1 2070
1457ab52
MN
2071 for(i=0; i<8; i++){
2072 //FIXME try pointer walks
622348f9
MN
2073 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
2074 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
2075 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
2076 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
115329f1 2077
1457ab52
MN
2078 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2079 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2080 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2081 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
115329f1 2082
1457ab52
MN
2083 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2084 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2085 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2086 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2087 }
2088
2089 for(i=0; i<8; i++){
2090 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2091 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2092 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2093 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
115329f1 2094
1457ab52
MN
2095 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2096 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2097 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2098 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
115329f1
DB
2099
2100 sum +=
1457ab52
MN
2101 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2102 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2103 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2104 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2105 }
115329f1 2106
c26abfa5 2107 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
115329f1 2108
1457ab52
MN
2109 return sum;
2110}
2111
bb198e19 2112static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
1457ab52 2113 MpegEncContext * const s= (MpegEncContext *)c;
40d11227 2114 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
115329f1 2115
bb198e19 2116 assert(h==8);
1457ab52
MN
2117
2118 s->dsp.diff_pixels(temp, src1, src2, stride);
b0368839 2119 s->dsp.fdct(temp);
1edbfe19 2120 return s->dsp.sum_abs_dctelem(temp);
1457ab52
MN
2121}
2122
b250f9c6 2123#if CONFIG_GPL
27c61ac5
MN
2124#define DCT8_1D {\
2125 const int s07 = SRC(0) + SRC(7);\
2126 const int s16 = SRC(1) + SRC(6);\
2127 const int s25 = SRC(2) + SRC(5);\
2128 const int s34 = SRC(3) + SRC(4);\
2129 const int a0 = s07 + s34;\
2130 const int a1 = s16 + s25;\
2131 const int a2 = s07 - s34;\
2132 const int a3 = s16 - s25;\
2133 const int d07 = SRC(0) - SRC(7);\
2134 const int d16 = SRC(1) - SRC(6);\
2135 const int d25 = SRC(2) - SRC(5);\
2136 const int d34 = SRC(3) - SRC(4);\
2137 const int a4 = d16 + d25 + (d07 + (d07>>1));\
2138 const int a5 = d07 - d34 - (d25 + (d25>>1));\
2139 const int a6 = d07 + d34 - (d16 + (d16>>1));\
2140 const int a7 = d16 - d25 + (d34 + (d34>>1));\
2141 DST(0, a0 + a1 ) ;\
2142 DST(1, a4 + (a7>>2)) ;\
2143 DST(2, a2 + (a3>>1)) ;\
2144 DST(3, a5 + (a6>>2)) ;\
2145 DST(4, a0 - a1 ) ;\
2146 DST(5, a6 - (a5>>2)) ;\
2147 DST(6, (a2>>1) - a3 ) ;\
2148 DST(7, (a4>>2) - a7 ) ;\
2149}
2150
2151static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2152 MpegEncContext * const s= (MpegEncContext *)c;
8d15910a 2153 DCTELEM dct[8][8];
27c61ac5
MN
2154 int i;
2155 int sum=0;
2156
8d15910a 2157 s->dsp.diff_pixels(dct[0], src1, src2, stride);
27c61ac5
MN
2158
2159#define SRC(x) dct[i][x]
2160#define DST(x,v) dct[i][x]= v
2161 for( i = 0; i < 8; i++ )
2162 DCT8_1D
2163#undef SRC
2164#undef DST
2165
2166#define SRC(x) dct[x][i]
c26abfa5 2167#define DST(x,v) sum += FFABS(v)
27c61ac5
MN
2168 for( i = 0; i < 8; i++ )
2169 DCT8_1D
2170#undef SRC
2171#undef DST
2172 return sum;
2173}
2174#endif
2175
0fd6aea1
MN
2176static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2177 MpegEncContext * const s= (MpegEncContext *)c;
40d11227 2178 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
0fd6aea1 2179 int sum=0, i;
115329f1 2180
0fd6aea1
MN
2181 assert(h==8);
2182
2183 s->dsp.diff_pixels(temp, src1, src2, stride);
2184 s->dsp.fdct(temp);
2185
2186 for(i=0; i<64; i++)
c26abfa5 2187 sum= FFMAX(sum, FFABS(temp[i]));
115329f1 2188
0fd6aea1
MN
2189 return sum;
2190}
2191
bb198e19 2192static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
1457ab52 2193 MpegEncContext * const s= (MpegEncContext *)c;
40d11227 2194 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
2480c390 2195 DCTELEM * const bak = temp+64;
1457ab52
MN
2196 int sum=0, i;
2197
bb198e19 2198 assert(h==8);
1457ab52 2199 s->mb_intra=0;
115329f1 2200
1457ab52 2201 s->dsp.diff_pixels(temp, src1, src2, stride);
115329f1 2202
1457ab52 2203 memcpy(bak, temp, 64*sizeof(DCTELEM));
115329f1 2204
67725183 2205 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
d50635cd 2206 s->dct_unquantize_inter(s, temp, 0, s->qscale);
e7a972e1 2207 ff_simple_idct_8(temp); //FIXME
115329f1 2208
1457ab52
MN
2209 for(i=0; i<64; i++)
2210 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
115329f1 2211
1457ab52
MN
2212 return sum;
2213}
2214
bb198e19 2215static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3a87ac94 2216 MpegEncContext * const s= (MpegEncContext *)c;
0c1a9eda 2217 const uint8_t *scantable= s->intra_scantable.permutated;
40d11227
MR
2218 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2219 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
2220 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
e6dba5df 2221 int i, last, run, bits, level, distortion, start_i;
3a87ac94
MN
2222 const int esc_length= s->ac_esc_length;
2223 uint8_t * length;
2224 uint8_t * last_length;
115329f1 2225
bb198e19
MN
2226 assert(h==8);
2227
90d43b52
MR
2228 copy_block8(lsrc1, src1, 8, stride, 8);
2229 copy_block8(lsrc2, src2, 8, stride, 8);
3a87ac94 2230
90d43b52 2231 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
67725183
MN
2232
2233 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2234
2235 bits=0;
115329f1 2236
3a87ac94 2237 if (s->mb_intra) {
115329f1 2238 start_i = 1;
3a87ac94
MN
2239 length = s->intra_ac_vlc_length;
2240 last_length= s->intra_ac_vlc_last_length;
67725183 2241 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3a87ac94
MN
2242 } else {
2243 start_i = 0;
2244 length = s->inter_ac_vlc_length;
2245 last_length= s->inter_ac_vlc_last_length;
2246 }
115329f1 2247
67725183 2248 if(last>=start_i){
3a87ac94
MN
2249 run=0;
2250 for(i=start_i; i<last; i++){
2251 int j= scantable[i];
2252 level= temp[j];
115329f1 2253
3a87ac94
MN
2254 if(level){
2255 level+=64;
2256 if((level&(~127)) == 0){
2257 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2258 }else
2259 bits+= esc_length;
2260 run=0;
2261 }else
2262 run++;
2263 }
2264 i= scantable[last];
115329f1 2265
3a87ac94 2266 level= temp[i] + 64;
1d0eab1d
MN
2267
2268 assert(level - 64);
115329f1 2269
3a87ac94
MN
2270 if((level&(~127)) == 0){
2271 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2272 }else
2273 bits+= esc_length;
115329f1 2274
67725183
MN
2275 }
2276
2277 if(last>=0){
d50635cd
MN
2278 if(s->mb_intra)
2279 s->dct_unquantize_intra(s, temp, 0, s->qscale);
2280 else
2281 s->dct_unquantize_inter(s, temp, 0, s->qscale);
3a87ac94 2282 }
115329f1 2283
90d43b52 2284 s->dsp.idct_add(lsrc2, 8, temp);
115329f1 2285
90d43b52 2286 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
3a87ac94 2287
e6dba5df 2288 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
3a87ac94
MN
2289}
2290
bb198e19 2291static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3a87ac94 2292 MpegEncContext * const s= (MpegEncContext *)c;
0c1a9eda 2293 const uint8_t *scantable= s->intra_scantable.permutated;
40d11227 2294 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
3a87ac94
MN
2295 int i, last, run, bits, level, start_i;
2296 const int esc_length= s->ac_esc_length;
2297 uint8_t * length;
2298 uint8_t * last_length;
bb198e19
MN
2299
2300 assert(h==8);
115329f1 2301
67725183 2302 s->dsp.diff_pixels(temp, src1, src2, stride);
3a87ac94 2303
67725183
MN
2304 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2305
2306 bits=0;
115329f1 2307
3a87ac94 2308 if (s->mb_intra) {
115329f1 2309 start_i = 1;
3a87ac94
MN
2310 length = s->intra_ac_vlc_length;
2311 last_length= s->intra_ac_vlc_last_length;
67725183 2312 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3a87ac94
MN
2313 } else {
2314 start_i = 0;
2315 length = s->inter_ac_vlc_length;
2316 last_length= s->inter_ac_vlc_last_length;
2317 }
115329f1 2318
67725183 2319 if(last>=start_i){
3a87ac94
MN
2320 run=0;
2321 for(i=start_i; i<last; i++){
2322 int j= scantable[i];
2323 level= temp[j];
115329f1 2324
3a87ac94
MN
2325 if(level){
2326 level+=64;
2327 if((level&(~127)) == 0){
2328 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2329 }else
2330 bits+= esc_length;
2331 run=0;
2332 }else
2333 run++;
2334 }
2335 i= scantable[last];
115329f1 2336
67725183 2337 level= temp[i] + 64;
115329f1 2338
67725183 2339 assert(level - 64);
115329f1 2340
3a87ac94
MN
2341 if((level&(~127)) == 0){
2342 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2343 }else
2344 bits+= esc_length;
2345 }
2346
2347 return bits;
2348}
2349
7fb7f636
RS
2350#define VSAD_INTRA(size) \
2351static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2352 int score=0; \
2353 int x,y; \
2354 \
2355 for(y=1; y<h; y++){ \
2356 for(x=0; x<size; x+=4){ \
2357 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
2358 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
2359 } \
2360 s+= stride; \
2361 } \
2362 \
2363 return score; \
2364}
2365VSAD_INTRA(8)
2366VSAD_INTRA(16)
622348f9
MN
2367
2368static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2369 int score=0;
2370 int x,y;
115329f1 2371
622348f9
MN
2372 for(y=1; y<h; y++){
2373 for(x=0; x<16; x++){
c26abfa5 2374 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
622348f9
MN
2375 }
2376 s1+= stride;
2377 s2+= stride;
2378 }
115329f1 2379
622348f9
MN
2380 return score;
2381}
2382
2383#define SQ(a) ((a)*(a))
7fb7f636
RS
2384#define VSSE_INTRA(size) \
2385static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2386 int score=0; \
2387 int x,y; \
2388 \
2389 for(y=1; y<h; y++){ \
2390 for(x=0; x<size; x+=4){ \
2391 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
2392 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
2393 } \
2394 s+= stride; \
2395 } \
2396 \
2397 return score; \
2398}
2399VSSE_INTRA(8)
2400VSSE_INTRA(16)
622348f9
MN
2401
2402static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2403 int score=0;
2404 int x,y;
115329f1 2405
622348f9
MN
2406 for(y=1; y<h; y++){
2407 for(x=0; x<16; x++){
2408 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2409 }
2410 s1+= stride;
2411 s2+= stride;
2412 }
115329f1 2413
622348f9
MN
2414 return score;
2415}
2416
a00177a9
MR
2417static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
2418 int size){
59006372
LM
2419 int score=0;
2420 int i;
2421 for(i=0; i<size; i++)
2422 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
2423 return score;
2424}
2425
9fbd14ac
DB
2426WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
2427WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
2428WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
b250f9c6 2429#if CONFIG_GPL
9fbd14ac 2430WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
60900991 2431#endif
9fbd14ac
DB
2432WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
2433WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
2434WRAPPER8_16_SQ(rd8x8_c, rd16_c)
2435WRAPPER8_16_SQ(bit8x8_c, bit16_c)
1457ab52 2436
6eabb0d3 2437static void vector_fmul_c(float *dst, const float *src0, const float *src1, int len){
eb4825b5
LM
2438 int i;
2439 for(i=0; i<len; i++)
6eabb0d3 2440 dst[i] = src0[i] * src1[i];
eb4825b5
LM
2441}
2442
2443static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
2444 int i;
2445 src1 += len-1;
2446 for(i=0; i<len; i++)
2447 dst[i] = src0[i] * src1[-i];
2448}
2449
952e8721 2450static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
eb4825b5
LM
2451 int i;
2452 for(i=0; i<len; i++)
952e8721 2453 dst[i] = src0[i] * src1[i] + src2[i];
eb4825b5
LM
2454}
2455
80ba1ddb
JR
2456static void vector_fmul_window_c(float *dst, const float *src0,
2457 const float *src1, const float *win, int len)
2458{
b9fa3208
LM
2459 int i,j;
2460 dst += len;
2461 win += len;
2462 src0+= len;
2463 for(i=-len, j=len-1; i<0; i++, j--) {
2464 float s0 = src0[i];
2465 float s1 = src1[j];
2466 float wi = win[i];
2467 float wj = win[j];
80ba1ddb
JR
2468 dst[i] = s0*wj - s1*wi;
2469 dst[j] = s0*wi + s1*wj;
b9fa3208 2470 }
f27e1d64
LM
2471}
2472
53b57211
MR
2473static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
2474 int len)
2475{
2476 int i;
2477 for (i = 0; i < len; i++)
2478 dst[i] = src[i] * mul;
2479}
2480
fcca826a
MR
2481static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
2482 int len)
2483{
2484 int i;
2485 for (i = 0; i < len; i++)
2486 dst[i] += src[i] * mul;
2487}
2488
53b57211
MR
2489static void butterflies_float_c(float *restrict v1, float *restrict v2,
2490 int len)
2491{
2492 int i;
2493 for (i = 0; i < len; i++) {
2494 float t = v1[i] - v2[i];
2495 v1[i] += v2[i];
2496 v2[i] = t;
2497 }
2498}
2499
9d06037d
JR
2500static void butterflies_float_interleave_c(float *dst, const float *src0,
2501 const float *src1, int len)
2502{
2503 int i;
2504 for (i = 0; i < len; i++) {
2505 float f1 = src0[i];
2506 float f2 = src1[i];
2507 dst[2*i ] = f1 + f2;
2508 dst[2*i + 1] = f1 - f2;
2509 }
2510}
2511
53b57211
MR
2512static float scalarproduct_float_c(const float *v1, const float *v2, int len)
2513{
2514 float p = 0.0;
2515 int i;
2516
2517 for (i = 0; i < len; i++)
2518 p += v1[i] * v2[i];
2519
2520 return p;
2521}
2522
0a68cd87
VS
2523static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
2524 uint32_t maxi, uint32_t maxisign)
2525{
2526
2527 if(a > mini) return mini;
187a5379 2528 else if((a^(1U<<31)) > maxisign) return maxi;
0a68cd87
VS
2529 else return a;
2530}
2531
50e23ae9 2532static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
0a68cd87
VS
2533 int i;
2534 uint32_t mini = *(uint32_t*)min;
2535 uint32_t maxi = *(uint32_t*)max;
187a5379 2536 uint32_t maxisign = maxi ^ (1U<<31);
0a68cd87 2537 uint32_t *dsti = (uint32_t*)dst;
50e23ae9 2538 const uint32_t *srci = (const uint32_t*)src;
0a68cd87
VS
2539 for(i=0; i<len; i+=8) {
2540 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
2541 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
2542 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
2543 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
2544 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
2545 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
2546 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
2547 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
2548 }
2549}
50e23ae9 2550static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
0a68cd87
VS
2551 int i;
2552 if(min < 0 && max > 0) {
2553 vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
2554 } else {
2555 for(i=0; i < len; i+=8) {
2556 dst[i ] = av_clipf(src[i ], min, max);
2557 dst[i + 1] = av_clipf(src[i + 1], min, max);
2558 dst[i + 2] = av_clipf(src[i + 2], min, max);
2559 dst[i + 3] = av_clipf(src[i + 3], min, max);
2560 dst[i + 4] = av_clipf(src[i + 4], min, max);
2561 dst[i + 5] = av_clipf(src[i + 5], min, max);
2562 dst[i + 6] = av_clipf(src[i + 6], min, max);
2563 dst[i + 7] = av_clipf(src[i + 7], min, max);
2564 }
2565 }
2566}
2567
b3858964 2568static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift)
88c0536a
KS
2569{
2570 int res = 0;
2571
2572 while (order--)
2573 res += (*v1++ * *v2++) >> shift;
2574
2575 return res;
2576}
2577
b3858964 2578static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
b1159ad9
LM
2579{
2580 int res = 0;
2581 while (order--) {
2582 res += *v1 * *v2++;
2583 *v1++ += mul * *v3++;
2584 }
2585 return res;
2586}
2587
e6e98234
JR
2588static void apply_window_int16_c(int16_t *output, const int16_t *input,
2589 const int16_t *window, unsigned int len)
2590{
2591 int i;
2592 int len2 = len >> 1;
2593
2594 for (i = 0; i < len2; i++) {
2595 int16_t w = window[i];
2596 output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15;
2597 output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
2598 }
2599}
2600
6054cd25
JR
2601static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
2602 int32_t max, unsigned int len)
2603{
2604 do {
2605 *dst++ = av_clip(*src++, min, max);
2606 *dst++ = av_clip(*src++, min, max);
2607 *dst++ = av_clip(*src++, min, max);
2608 *dst++ = av_clip(*src++, min, max);
2609 *dst++ = av_clip(*src++, min, max);
2610 *dst++ = av_clip(*src++, min, max);
2611 *dst++ = av_clip(*src++, min, max);
2612 *dst++ = av_clip(*src++, min, max);
2613 len -= 8;
2614 } while (len > 0);
2615}
2616
9abc7e0f
MN
2617#define W0 2048
2618#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
2619#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
2620#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
2621#define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
2622#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
2623#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
2624#define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
2625
2626static void wmv2_idct_row(short * b)
2627{
2628 int s1,s2;
2629 int a0,a1,a2,a3,a4,a5,a6,a7;
2630 /*step 1*/
2631 a1 = W1*b[1]+W7*b[7];
2632 a7 = W7*b[1]-W1*b[7];
2633 a5 = W5*b[5]+W3*b[3];
2634 a3 = W3*b[5]-W5*b[3];
2635 a2 = W2*b[2]+W6*b[6];
2636 a6 = W6*b[2]-W2*b[6];
2637 a0 = W0*b[0]+W0*b[4];
2638 a4 = W0*b[0]-W0*b[4];
2639 /*step 2*/
2640 s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7,
2641 s2 = (181*(a1-a5-a7+a3)+128)>>8;
2642 /*step 3*/
2643 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
2644 b[1] = (a4+a6 +s1 + (1<<7))>>8;
2645 b[2] = (a4-a6 +s2 + (1<<7))>>8;
2646 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
2647 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
2648 b[5] = (a4-a6 -s2 + (1<<7))>>8;
2649 b[6] = (a4+a6 -s1 + (1<<7))>>8;
2650 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
2651}
2652static void wmv2_idct_col(short * b)
2653{
2654 int s1,s2;
2655 int a0,a1,a2,a3,a4,a5,a6,a7;
2656 /*step 1, with extended precision*/
2657 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
2658 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
2659 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
2660 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
2661 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
2662 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
2663 a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
2664 a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
2665 /*step 2*/
2666 s1 = (181*(a1-a5+a7-a3)+128)>>8;
2667 s2 = (181*(a1-a5-a7+a3)+128)>>8;
2668 /*step 3*/
2669 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
2670 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
2671 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
2672 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
2673
2674 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
2675 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
2676 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
2677 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
2678}
2679void ff_wmv2_idct_c(short * block){
2680 int i;
2681
2682 for(i=0;i<64;i+=8){
2683 wmv2_idct_row(block+i);
2684 }
2685 for(i=0;i<8;i++){
2686 wmv2_idct_col(block+i);
2687 }
2688}
b0368839
MN
2689/* XXX: those functions should be suppressed ASAP when all IDCTs are
2690 converted */
9abc7e0f
MN
2691static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
2692{
2693 ff_wmv2_idct_c(block);
484a337c 2694 ff_put_pixels_clamped_c(block, dest, line_size);
9abc7e0f
MN
2695}
2696static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
2697{
2698 ff_wmv2_idct_c(block);
484a337c 2699 ff_add_pixels_clamped_c(block, dest, line_size);
9abc7e0f 2700}
b0368839
MN
2701static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
2702{
2703 j_rev_dct (block);
484a337c 2704 ff_put_pixels_clamped_c(block, dest, line_size);
b0368839
MN
2705}
2706static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
2707{
2708 j_rev_dct (block);
484a337c 2709 ff_add_pixels_clamped_c(block, dest, line_size);
b0368839
MN
2710}
2711
178fcca8
MN
2712static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
2713{
2714 j_rev_dct4 (block);
2715 put_pixels_clamped4_c(block, dest, line_size);
2716}
2717static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
2718{
2719 j_rev_dct4 (block);
2720 add_pixels_clamped4_c(block, dest, line_size);
2721}
2722
9ca358b9
MN
2723static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
2724{
2725 j_rev_dct2 (block);
2726 put_pixels_clamped2_c(block, dest, line_size);
2727}
2728static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
2729{
2730 j_rev_dct2 (block);
2731 add_pixels_clamped2_c(block, dest, line_size);
2732}
2733
1aa8c57b
MN
2734static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
2735{
55fde95e 2736 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1aa8c57b
MN
2737
2738 dest[0] = cm[(block[0] + 4)>>3];
2739}
2740static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
2741{
55fde95e 2742 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1aa8c57b
MN
2743
2744 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
2745}
2746
d111e41f 2747static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
513fbd8e 2748
59cf08ce 2749/* init static data */
0752cd39 2750av_cold void dsputil_static_init(void)
e0eac44e 2751{
d2975f8d 2752 int i;
e0eac44e 2753
55fde95e 2754 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
59cf08ce 2755 for(i=0;i<MAX_NEG_CROP;i++) {
55fde95e
MR
2756 ff_cropTbl[i] = 0;
2757 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
59cf08ce 2758 }
115329f1 2759
59cf08ce 2760 for(i=0;i<512;i++) {
1d503957 2761 ff_squareTbl[i] = (i - 256) * (i - 256);
59cf08ce 2762 }
115329f1 2763
486497e0 2764 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
59cf08ce 2765}
92ddb692 2766
6dc7d5da
MN
2767int ff_check_alignment(void){
2768 static int did_fail=0;
29b42c66 2769 LOCAL_ALIGNED_16(int, aligned, [4]);
6dc7d5da 2770
29b42c66 2771 if((intptr_t)aligned & 15){
6dc7d5da 2772 if(!did_fail){
b250f9c6 2773#if HAVE_MMX || HAVE_ALTIVEC
6dc7d5da 2774 av_log(NULL, AV_LOG_ERROR,
c1173617
MR
2775 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
2776 "and may be very slow or crash. This is not a bug in libavcodec,\n"
5e4c7ca2 2777 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
6001dad6 2778 "Do not report crashes to Libav developers.\n");
6dc7d5da
MN
2779#endif
2780 did_fail=1;
2781 }
2782 return -1;
2783 }
2784 return 0;
2785}
92ddb692 2786
0752cd39 2787av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
59cf08ce
FB
2788{
2789 int i;
de6d9b64 2790
6dc7d5da
MN
2791 ff_check_alignment();
2792
b250f9c6 2793#if CONFIG_ENCODERS
0a72533e
MR
2794 if (avctx->bits_per_raw_sample == 10) {
2795 c->fdct = ff_jpeg_fdct_islow_10;
2796 c->fdct248 = ff_fdct248_islow_10;
2797 } else {
2798 if(avctx->dct_algo==FF_DCT_FASTINT) {
2799 c->fdct = fdct_ifast;
2800 c->fdct248 = fdct_ifast248;
2801 }
2802 else if(avctx->dct_algo==FF_DCT_FAAN) {
2803 c->fdct = ff_faandct;
2804 c->fdct248 = ff_faandct248;
2805 }
2806 else {
2807 c->fdct = ff_jpeg_fdct_islow_8; //slow/accurate/default
2808 c->fdct248 = ff_fdct248_islow_8;
2809 }
10acc479 2810 }
b0368839
MN
2811#endif //CONFIG_ENCODERS
2812
178fcca8 2813 if(avctx->lowres==1){
7b4ee3a2
MR
2814 c->idct_put= ff_jref_idct4_put;
2815 c->idct_add= ff_jref_idct4_add;
178fcca8 2816 c->idct = j_rev_dct4;
b0368839 2817 c->idct_permutation_type= FF_NO_IDCT_PERM;
9ca358b9
MN
2818 }else if(avctx->lowres==2){
2819 c->idct_put= ff_jref_idct2_put;
2820 c->idct_add= ff_jref_idct2_add;
2821 c->idct = j_rev_dct2;
2822 c->idct_permutation_type= FF_NO_IDCT_PERM;
1aa8c57b
MN
2823 }else if(avctx->lowres==3){
2824 c->idct_put= ff_jref_idct1_put;
2825 c->idct_add= ff_jref_idct1_add;
2826 c->idct = j_rev_dct1;
2827 c->idct_permutation_type= FF_NO_IDCT_PERM;
178fcca8 2828 }else{
e7a972e1
MR
2829 if (avctx->bits_per_raw_sample == 10) {
2830 c->idct_put = ff_simple_idct_put_10;
2831 c->idct_add = ff_simple_idct_add_10;
2832 c->idct = ff_simple_idct_10;
2833 c->idct_permutation_type = FF_NO_IDCT_PERM;
2834 } else {
178fcca8
MN
2835 if(avctx->idct_algo==FF_IDCT_INT){
2836 c->idct_put= ff_jref_idct_put;
2837 c->idct_add= ff_jref_idct_add;
2838 c->idct = j_rev_dct;
2839 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
99e5a9d1 2840 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
9b5dc867 2841 avctx->idct_algo==FF_IDCT_VP3){
8b6103da
MN
2842 c->idct_put= ff_vp3_idct_put_c;
2843 c->idct_add= ff_vp3_idct_add_c;
2844 c->idct = ff_vp3_idct_c;
2845 c->idct_permutation_type= FF_NO_IDCT_PERM;
9abc7e0f
MN
2846 }else if(avctx->idct_algo==FF_IDCT_WMV2){
2847 c->idct_put= ff_wmv2_idct_put_c;
2848 c->idct_add= ff_wmv2_idct_add_c;
2849 c->idct = ff_wmv2_idct_c;
2850 c->idct_permutation_type= FF_NO_IDCT_PERM;
6f08c541
MN
2851 }else if(avctx->idct_algo==FF_IDCT_FAAN){
2852 c->idct_put= ff_faanidct_put;
2853 c->idct_add= ff_faanidct_add;
2854 c->idct = ff_faanidct;
2855 c->idct_permutation_type= FF_NO_IDCT_PERM;
49fb20cb 2856 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
28245435
PR
2857 c->idct_put= ff_ea_idct_put_c;
2858 c->idct_permutation_type= FF_NO_IDCT_PERM;
178fcca8 2859 }else{ //accurate/default
e7a972e1
MR
2860 c->idct_put = ff_simple_idct_put_8;
2861 c->idct_add = ff_simple_idct_add_8;
2862 c->idct = ff_simple_idct_8;
178fcca8
MN
2863 c->idct_permutation_type= FF_NO_IDCT_PERM;
2864 }
e7a972e1 2865 }
b0368839
MN
2866 }
2867
eb4b3dd3 2868 c->diff_pixels = diff_pixels_c;
484a337c
RB
2869 c->put_pixels_clamped = ff_put_pixels_clamped_c;
2870 c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
484a337c 2871 c->add_pixels_clamped = ff_add_pixels_clamped_c;
1edbfe19 2872 c->sum_abs_dctelem = sum_abs_dctelem_c;
eb4b3dd3 2873 c->gmc1 = gmc1_c;
703c8195 2874 c->gmc = ff_gmc_c;
eb4b3dd3
ZK
2875 c->pix_sum = pix_sum_c;
2876 c->pix_norm1 = pix_norm1_c;
2877
342c7dfd
KS
2878 c->fill_block_tab[0] = fill_block16_c;
2879 c->fill_block_tab[1] = fill_block8_c;
342c7dfd 2880
45553457 2881 /* TODO [0] 16 [1] 8 */
bb198e19
MN
2882 c->pix_abs[0][0] = pix_abs16_c;
2883 c->pix_abs[0][1] = pix_abs16_x2_c;
2884 c->pix_abs[0][2] = pix_abs16_y2_c;
2885 c->pix_abs[0][3] = pix_abs16_xy2_c;
2886 c->pix_abs[1][0] = pix_abs8_c;
2887 c->pix_abs[1][1] = pix_abs8_x2_c;
2888 c->pix_abs[1][2] = pix_abs8_y2_c;
2889 c->pix_abs[1][3] = pix_abs8_xy2_c;
eb4b3dd3 2890
669ac79c
MN
2891 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
2892 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
2893 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
2894 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
2895 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
2896 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
2897 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
2898 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
2899 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
2900
da3b9756
MM
2901 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
2902 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
2903 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
2904 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
2905 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
2906 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
2907 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
2908 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
2909 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
2910
45553457
ZK
2911#define dspfunc(PFX, IDX, NUM) \
2912 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2913 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2914 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2915 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2916 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2917 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2918 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2919 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2920 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2921 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2922 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2923 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2924 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2925 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2926 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2927 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2928
2929 dspfunc(put_qpel, 0, 16);
2930 dspfunc(put_no_rnd_qpel, 0, 16);
2931
2932 dspfunc(avg_qpel, 0, 16);
2933 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
2934
2935 dspfunc(put_qpel, 1, 8);
2936 dspfunc(put_no_rnd_qpel, 1, 8);
2937
2938 dspfunc(avg_qpel, 1, 8);
2939 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
0da71265 2940
45553457 2941#undef dspfunc
5a6a9e78 2942
bf4f19dc
RP
2943#if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
2944 ff_mlp_init(c, avctx);
2945#endif
9be6f0d2 2946#if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
9abc7e0f
MN
2947 ff_intrax8dsp_init(c,avctx);
2948#endif
b482e2d1 2949
3d1b1caa 2950 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
1457ab52
MN
2951 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
2952 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
2953 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
2954 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
2955 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
2956 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
2957 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
115329f1 2958
bb198e19
MN
2959#define SET_CMP_FUNC(name) \
2960 c->name[0]= name ## 16_c;\
2961 c->name[1]= name ## 8x8_c;
115329f1 2962
bb198e19 2963 SET_CMP_FUNC(hadamard8_diff)
622348f9 2964 c->hadamard8_diff[4]= hadamard8_intra16_c;
7fb7f636 2965 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
bb198e19 2966 SET_CMP_FUNC(dct_sad)
0fd6aea1 2967 SET_CMP_FUNC(dct_max)
b250f9c6 2968#if CONFIG_GPL
27c61ac5 2969 SET_CMP_FUNC(dct264_sad)
60900991 2970#endif
bb198e19
MN
2971 c->sad[0]= pix_abs16_c;
2972 c->sad[1]= pix_abs8_c;
2973 c->sse[0]= sse16_c;
2974 c->sse[1]= sse8_c;
26efc54e 2975 c->sse[2]= sse4_c;
bb198e19
MN
2976 SET_CMP_FUNC(quant_psnr)
2977 SET_CMP_FUNC(rd)
2978 SET_CMP_FUNC(bit)
622348f9
MN
2979 c->vsad[0]= vsad16_c;
2980 c->vsad[4]= vsad_intra16_c;
7fb7f636 2981 c->vsad[5]= vsad_intra8_c;
622348f9
MN
2982 c->vsse[0]= vsse16_c;
2983 c->vsse[4]= vsse_intra16_c;
7fb7f636 2984 c->vsse[5]= vsse_intra8_c;
e6a2ac34
MN
2985 c->nsse[0]= nsse16_c;
2986 c->nsse[1]= nsse8_c;
05aec7bb
MR
2987#if CONFIG_DWT
2988 ff_dsputil_init_dwt(c);
3a6fc8fa 2989#endif
26efc54e 2990
59006372
LM
2991 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
2992
11f18faf
MN
2993 c->add_bytes= add_bytes_c;
2994 c->diff_bytes= diff_bytes_c;
3daa434a 2995 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
84705403 2996 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
73c6f598
NC
2997 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
2998 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
3d2e8cce 2999 c->bswap_buf= bswap_buf;
381d37fd 3000 c->bswap16_buf = bswap16_buf;
42251a2a 3001
4052cbf1 3002 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
674eeb5f
AJ
3003 c->h263_h_loop_filter= h263_h_loop_filter_c;
3004 c->h263_v_loop_filter= h263_v_loop_filter_c;
eb75a698 3005 }
115329f1 3006
99e5a9d1 3007 if (CONFIG_VP3_DECODER) {
9971331d
DC
3008 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
3009 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
eb6a6cd7 3010 c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
9971331d
DC
3011 }
3012
fdbbf2e0 3013 c->h261_loop_filter= h261_loop_filter_c;
115329f1 3014
364a1797
MN
3015 c->try_8x8basis= try_8x8basis_c;
3016 c->add_8x8basis= add_8x8basis_c;
11f18faf 3017
b250f9c6 3018#if CONFIG_VORBIS_DECODER
2dac4acf
LM
3019 c->vorbis_inverse_coupling = vorbis_inverse_coupling;
3020#endif
b250f9c6 3021#if CONFIG_AC3_DECODER
ac2e5564
LM
3022 c->ac3_downmix = ff_ac3_downmix_c;
3023#endif
eb4825b5
LM
3024 c->vector_fmul = vector_fmul_c;
3025 c->vector_fmul_reverse = vector_fmul_reverse_c;
952e8721 3026 c->vector_fmul_add = vector_fmul_add_c;
80ba1ddb 3027 c->vector_fmul_window = vector_fmul_window_c;
0a68cd87 3028 c->vector_clipf = vector_clipf_c;
88c0536a 3029 c->scalarproduct_int16 = scalarproduct_int16_c;
b1159ad9 3030 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
e6e98234 3031 c->apply_window_int16 = apply_window_int16_c;
6054cd25 3032 c->vector_clip_int32 = vector_clip_int32_c;
53b57211
MR
3033 c->scalarproduct_float = scalarproduct_float_c;
3034 c->butterflies_float = butterflies_float_c;
9d06037d 3035 c->butterflies_float_interleave = butterflies_float_interleave_c;
53b57211 3036 c->vector_fmul_scalar = vector_fmul_scalar_c;
fcca826a 3037 c->vector_fmac_scalar = vector_fmac_scalar_c;
53b57211 3038
9686abb8 3039 c->shrink[0]= av_image_copy_plane;
54009d42
MN
3040 c->shrink[1]= ff_shrink22;
3041 c->shrink[2]= ff_shrink44;
3042 c->shrink[3]= ff_shrink88;
3043
513fbd8e
LM
3044 c->prefetch= just_return;
3045
2833fc46
LM
3046 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
3047 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
3048
19a0729b
OA
3049#undef FUNC
3050#undef FUNCC
3051#define FUNC(f, depth) f ## _ ## depth
3052#define FUNCC(f, depth) f ## _ ## depth ## _c
3053
3054#define dspfunc1(PFX, IDX, NUM, depth)\
3055 c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM , depth);\
3056 c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
3057 c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
3058 c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
3059
3060#define dspfunc2(PFX, IDX, NUM, depth)\
3061 c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
3062 c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
3063 c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
3064 c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
3065 c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
3066 c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
3067 c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
3068 c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
3069 c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
3070 c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
3071 c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
3072 c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
3073 c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
3074 c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
3075 c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
3076 c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
3077
3078
5cc26009 3079#define BIT_DEPTH_FUNCS(depth, dct)\
874f1a90 3080 c->get_pixels = FUNCC(get_pixels ## dct , depth);\
19a0729b
OA
3081 c->draw_edges = FUNCC(draw_edges , depth);\
3082 c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\
5cc26009
MR
3083 c->clear_block = FUNCC(clear_block ## dct , depth);\
3084 c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
3085 c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
3086 c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
19a0729b
OA
3087 c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\
3088 c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\
3089\
3090 c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\
3091 c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\
3092 c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\
3093 c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\
3094 c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\
3095 c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth);\
3096\
3097 dspfunc1(put , 0, 16, depth);\
3098 dspfunc1(put , 1, 8, depth);\
3099 dspfunc1(put , 2, 4, depth);\
3100 dspfunc1(put , 3, 2, depth);\
3101 dspfunc1(put_no_rnd, 0, 16, depth);\
3102 dspfunc1(put_no_rnd, 1, 8, depth);\
3103 dspfunc1(avg , 0, 16, depth);\
3104 dspfunc1(avg , 1, 8, depth);\
3105 dspfunc1(avg , 2, 4, depth);\
3106 dspfunc1(avg , 3, 2, depth);\
3107 dspfunc1(avg_no_rnd, 0, 16, depth);\
3108 dspfunc1(avg_no_rnd, 1, 8, depth);\
3109\
3110 dspfunc2(put_h264_qpel, 0, 16, depth);\
3111 dspfunc2(put_h264_qpel, 1, 8, depth);\
3112 dspfunc2(put_h264_qpel, 2, 4, depth);\
3113 dspfunc2(put_h264_qpel, 3, 2, depth);\
3114 dspfunc2(avg_h264_qpel, 0, 16, depth);\
3115 dspfunc2(avg_h264_qpel, 1, 8, depth);\
3116 dspfunc2(avg_h264_qpel, 2, 4, depth);
3117
a82beafd
MR
3118 switch (avctx->bits_per_raw_sample) {
3119 case 9:
5cc26009
MR
3120 if (c->dct_bits == 32) {
3121 BIT_DEPTH_FUNCS(9, _32);
3122 } else {
3123 BIT_DEPTH_FUNCS(9, _16);
3124 }
a82beafd
MR
3125 break;
3126 case 10:
5cc26009
MR
3127 if (c->dct_bits == 32) {
3128 BIT_DEPTH_FUNCS(10, _32);
3129 } else {
3130 BIT_DEPTH_FUNCS(10, _16);
3131 }
a82beafd
MR
3132 break;
3133 default:
5cc26009 3134 BIT_DEPTH_FUNCS(8, _16);
a82beafd 3135 break;
19a0729b
OA
3136 }
3137
3138
49fb20cb
AJ
3139 if (HAVE_MMX) dsputil_init_mmx (c, avctx);
3140 if (ARCH_ARM) dsputil_init_arm (c, avctx);
3141 if (CONFIG_MLIB) dsputil_init_mlib (c, avctx);
3142 if (HAVE_VIS) dsputil_init_vis (c, avctx);
3143 if (ARCH_ALPHA) dsputil_init_alpha (c, avctx);
3144 if (ARCH_PPC) dsputil_init_ppc (c, avctx);
3145 if (HAVE_MMI) dsputil_init_mmi (c, avctx);
3146 if (ARCH_SH4) dsputil_init_sh4 (c, avctx);
3147 if (ARCH_BFIN) dsputil_init_bfin (c, avctx);
43f1708f 3148
2833fc46
LM
3149 for(i=0; i<64; i++){
3150 if(!c->put_2tap_qpel_pixels_tab[0][i])
3151 c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
3152 if(!c->avg_2tap_qpel_pixels_tab[0][i])
3153 c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
3154 }
3155
92fb52d9
RB
3156 ff_init_scantable_permutation(c->idct_permutation,
3157 c->idct_permutation_type);
57060b1e 3158}