Add support for higher QP values in h264.
[libav.git] / libavcodec / dsputil.c
CommitLineData
de6d9b64
FB
1/*
2 * DSP utils
406792e7 3 * Copyright (c) 2000, 2001 Fabrice Bellard
8f2ab833 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
de6d9b64 5 *
7b94177e
DB
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
7 *
2912e87a 8 * This file is part of Libav.
b78e7197 9 *
2912e87a 10 * Libav is free software; you can redistribute it and/or
ff4ec49e
FB
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
b78e7197 13 * version 2.1 of the License, or (at your option) any later version.
de6d9b64 14 *
2912e87a 15 * Libav is distributed in the hope that it will be useful,
de6d9b64 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ff4ec49e
FB
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
de6d9b64 19 *
ff4ec49e 20 * You should have received a copy of the GNU Lesser General Public
2912e87a 21 * License along with Libav; if not, write to the Free Software
5509bffa 22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
de6d9b64 23 */
115329f1 24
983e3246 25/**
ba87f080 26 * @file
983e3246
MN
27 * DSP utils
28 */
115329f1 29
737eb597 30#include "libavutil/imgutils.h"
de6d9b64
FB
31#include "avcodec.h"
32#include "dsputil.h"
b0368839 33#include "simple_idct.h"
65e4c8c9 34#include "faandct.h"
6f08c541 35#include "faanidct.h"
199436b9 36#include "mathops.h"
af818f7a
DB
37#include "mpegvideo.h"
38#include "config.h"
3da11804
MR
39#include "ac3dec.h"
40#include "vorbis.h"
41#include "png.h"
5596c60c 42
55fde95e 43uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
1d503957 44uint32_t ff_squareTbl[512] = {0, };
de6d9b64 45
325eefa2
OA
46#include "dsputil_template.c"
47
917f55cc
LM
48// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
49#define pb_7f (~0UL/255 * 0x7f)
50#define pb_80 (~0UL/255 * 0x80)
469bd7b1 51
0c1a9eda 52const uint8_t ff_zigzag_direct[64] = {
2ad1516a
MN
53 0, 1, 8, 16, 9, 2, 3, 10,
54 17, 24, 32, 25, 18, 11, 4, 5,
e0eac44e 55 12, 19, 26, 33, 40, 48, 41, 34,
2ad1516a 56 27, 20, 13, 6, 7, 14, 21, 28,
e0eac44e
FB
57 35, 42, 49, 56, 57, 50, 43, 36,
58 29, 22, 15, 23, 30, 37, 44, 51,
59 58, 59, 52, 45, 38, 31, 39, 46,
60 53, 60, 61, 54, 47, 55, 62, 63
61};
62
10acc479
RS
63/* Specific zigzag scan for 248 idct. NOTE that unlike the
64 specification, we interleave the fields */
65const uint8_t ff_zigzag248_direct[64] = {
66 0, 8, 1, 9, 16, 24, 2, 10,
67 17, 25, 32, 40, 48, 56, 33, 41,
68 18, 26, 3, 11, 4, 12, 19, 27,
69 34, 42, 49, 57, 50, 58, 35, 43,
70 20, 28, 5, 13, 6, 14, 21, 29,
71 36, 44, 51, 59, 52, 60, 37, 45,
72 22, 30, 7, 15, 23, 31, 38, 46,
73 53, 61, 54, 62, 39, 47, 55, 63,
74};
75
2f349de2 76/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
84dc2d8a 77DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
2f349de2 78
0c1a9eda 79const uint8_t ff_alternate_horizontal_scan[64] = {
115329f1 80 0, 1, 2, 3, 8, 9, 16, 17,
e0eac44e 81 10, 11, 4, 5, 6, 7, 15, 14,
115329f1 82 13, 12, 19, 18, 24, 25, 32, 33,
e0eac44e 83 26, 27, 20, 21, 22, 23, 28, 29,
115329f1 84 30, 31, 34, 35, 40, 41, 48, 49,
e0eac44e 85 42, 43, 36, 37, 38, 39, 44, 45,
115329f1 86 46, 47, 50, 51, 56, 57, 58, 59,
e0eac44e
FB
87 52, 53, 54, 55, 60, 61, 62, 63,
88};
89
0c1a9eda 90const uint8_t ff_alternate_vertical_scan[64] = {
115329f1 91 0, 8, 16, 24, 1, 9, 2, 10,
e0eac44e 92 17, 25, 32, 40, 48, 56, 57, 49,
115329f1 93 41, 33, 26, 18, 3, 11, 4, 12,
e0eac44e 94 19, 27, 34, 42, 50, 58, 35, 43,
115329f1 95 51, 59, 20, 28, 5, 13, 6, 14,
e0eac44e 96 21, 29, 36, 44, 52, 60, 37, 45,
115329f1 97 53, 61, 22, 30, 7, 15, 23, 31,
e0eac44e
FB
98 38, 46, 54, 62, 39, 47, 55, 63,
99};
100
b0368839
MN
101/* Input permutation for the simple_idct_mmx */
102static const uint8_t simple_mmx_permutation[64]={
bb270c08
DB
103 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
104 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
105 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
106 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
107 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
108 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
109 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
110 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
b0368839
MN
111};
112
0e956ba2
AS
113static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
114
4c79b95c
AJ
115void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
116 int i;
117 int end;
118
119 st->scantable= src_scantable;
120
121 for(i=0; i<64; i++){
122 int j;
123 j = src_scantable[i];
124 st->permutated[i] = permutation[j];
b250f9c6 125#if ARCH_PPC
4c79b95c
AJ
126 st->inverse[j] = i;
127#endif
128 }
129
130 end=-1;
131 for(i=0; i<64; i++){
132 int j;
133 j = st->permutated[i];
134 if(j>end) end=j;
135 st->raster_end[i]= end;
136 }
137}
138
0c1a9eda 139static int pix_sum_c(uint8_t * pix, int line_size)
3aa102be
MN
140{
141 int s, i, j;
142
143 s = 0;
144 for (i = 0; i < 16; i++) {
bb270c08
DB
145 for (j = 0; j < 16; j += 8) {
146 s += pix[0];
147 s += pix[1];
148 s += pix[2];
149 s += pix[3];
150 s += pix[4];
151 s += pix[5];
152 s += pix[6];
153 s += pix[7];
154 pix += 8;
155 }
156 pix += line_size - 16;
3aa102be
MN
157 }
158 return s;
159}
160
0c1a9eda 161static int pix_norm1_c(uint8_t * pix, int line_size)
3aa102be
MN
162{
163 int s, i, j;
1d503957 164 uint32_t *sq = ff_squareTbl + 256;
3aa102be
MN
165
166 s = 0;
167 for (i = 0; i < 16; i++) {
bb270c08 168 for (j = 0; j < 16; j += 8) {
2a006cd3 169#if 0
bb270c08
DB
170 s += sq[pix[0]];
171 s += sq[pix[1]];
172 s += sq[pix[2]];
173 s += sq[pix[3]];
174 s += sq[pix[4]];
175 s += sq[pix[5]];
176 s += sq[pix[6]];
177 s += sq[pix[7]];
2a006cd3
FL
178#else
179#if LONG_MAX > 2147483647
bb270c08
DB
180 register uint64_t x=*(uint64_t*)pix;
181 s += sq[x&0xff];
182 s += sq[(x>>8)&0xff];
183 s += sq[(x>>16)&0xff];
184 s += sq[(x>>24)&0xff];
2a006cd3
FL
185 s += sq[(x>>32)&0xff];
186 s += sq[(x>>40)&0xff];
187 s += sq[(x>>48)&0xff];
188 s += sq[(x>>56)&0xff];
189#else
bb270c08
DB
190 register uint32_t x=*(uint32_t*)pix;
191 s += sq[x&0xff];
192 s += sq[(x>>8)&0xff];
193 s += sq[(x>>16)&0xff];
194 s += sq[(x>>24)&0xff];
2a006cd3
FL
195 x=*(uint32_t*)(pix+4);
196 s += sq[x&0xff];
197 s += sq[(x>>8)&0xff];
198 s += sq[(x>>16)&0xff];
199 s += sq[(x>>24)&0xff];
200#endif
201#endif
bb270c08
DB
202 pix += 8;
203 }
204 pix += line_size - 16;
3aa102be
MN
205 }
206 return s;
207}
208
96711ecf 209static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
3d2e8cce 210 int i;
115329f1 211
3d2e8cce 212 for(i=0; i+8<=w; i+=8){
8fc0162a
MR
213 dst[i+0]= av_bswap32(src[i+0]);
214 dst[i+1]= av_bswap32(src[i+1]);
215 dst[i+2]= av_bswap32(src[i+2]);
216 dst[i+3]= av_bswap32(src[i+3]);
217 dst[i+4]= av_bswap32(src[i+4]);
218 dst[i+5]= av_bswap32(src[i+5]);
219 dst[i+6]= av_bswap32(src[i+6]);
220 dst[i+7]= av_bswap32(src[i+7]);
3d2e8cce
MN
221 }
222 for(;i<w; i++){
8fc0162a 223 dst[i+0]= av_bswap32(src[i+0]);
3d2e8cce
MN
224 }
225}
3aa102be 226
381d37fd
MR
227static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
228{
229 while (len--)
230 *dst++ = av_bswap16(*src++);
231}
232
26efc54e
MN
233static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
234{
235 int s, i;
1d503957 236 uint32_t *sq = ff_squareTbl + 256;
26efc54e
MN
237
238 s = 0;
239 for (i = 0; i < h; i++) {
240 s += sq[pix1[0] - pix2[0]];
241 s += sq[pix1[1] - pix2[1]];
242 s += sq[pix1[2] - pix2[2]];
243 s += sq[pix1[3] - pix2[3]];
244 pix1 += line_size;
245 pix2 += line_size;
246 }
247 return s;
248}
249
bb198e19 250static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
1457ab52
MN
251{
252 int s, i;
1d503957 253 uint32_t *sq = ff_squareTbl + 256;
1457ab52
MN
254
255 s = 0;
bb198e19 256 for (i = 0; i < h; i++) {
1457ab52
MN
257 s += sq[pix1[0] - pix2[0]];
258 s += sq[pix1[1] - pix2[1]];
259 s += sq[pix1[2] - pix2[2]];
260 s += sq[pix1[3] - pix2[3]];
261 s += sq[pix1[4] - pix2[4]];
262 s += sq[pix1[5] - pix2[5]];
263 s += sq[pix1[6] - pix2[6]];
264 s += sq[pix1[7] - pix2[7]];
265 pix1 += line_size;
266 pix2 += line_size;
267 }
268 return s;
269}
270
bb198e19 271static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
9c76bd48 272{
6b026927 273 int s, i;
1d503957 274 uint32_t *sq = ff_squareTbl + 256;
9c76bd48
BF
275
276 s = 0;
bb198e19 277 for (i = 0; i < h; i++) {
6b026927
FH
278 s += sq[pix1[ 0] - pix2[ 0]];
279 s += sq[pix1[ 1] - pix2[ 1]];
280 s += sq[pix1[ 2] - pix2[ 2]];
281 s += sq[pix1[ 3] - pix2[ 3]];
282 s += sq[pix1[ 4] - pix2[ 4]];
283 s += sq[pix1[ 5] - pix2[ 5]];
284 s += sq[pix1[ 6] - pix2[ 6]];
285 s += sq[pix1[ 7] - pix2[ 7]];
286 s += sq[pix1[ 8] - pix2[ 8]];
287 s += sq[pix1[ 9] - pix2[ 9]];
288 s += sq[pix1[10] - pix2[10]];
289 s += sq[pix1[11] - pix2[11]];
290 s += sq[pix1[12] - pix2[12]];
291 s += sq[pix1[13] - pix2[13]];
292 s += sq[pix1[14] - pix2[14]];
293 s += sq[pix1[15] - pix2[15]];
2a006cd3 294
6b026927
FH
295 pix1 += line_size;
296 pix2 += line_size;
9c76bd48
BF
297 }
298 return s;
299}
300
0c1a9eda 301static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
de6d9b64 302{
de6d9b64
FB
303 int i;
304
305 /* read the pixels */
de6d9b64 306 for(i=0;i<8;i++) {
c13e1abd
FH
307 block[0] = pixels[0];
308 block[1] = pixels[1];
309 block[2] = pixels[2];
310 block[3] = pixels[3];
311 block[4] = pixels[4];
312 block[5] = pixels[5];
313 block[6] = pixels[6];
314 block[7] = pixels[7];
315 pixels += line_size;
316 block += 8;
de6d9b64
FB
317 }
318}
319
0c1a9eda 320static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
bb270c08 321 const uint8_t *s2, int stride){
9dbcbd92
MN
322 int i;
323
324 /* read the pixels */
9dbcbd92 325 for(i=0;i<8;i++) {
c13e1abd
FH
326 block[0] = s1[0] - s2[0];
327 block[1] = s1[1] - s2[1];
328 block[2] = s1[2] - s2[2];
329 block[3] = s1[3] - s2[3];
330 block[4] = s1[4] - s2[4];
331 block[5] = s1[5] - s2[5];
332 block[6] = s1[6] - s2[6];
333 block[7] = s1[7] - s2[7];
9dbcbd92
MN
334 s1 += stride;
335 s2 += stride;
c13e1abd 336 block += 8;
9dbcbd92
MN
337 }
338}
339
340
484a337c
RB
341void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
342 int line_size)
de6d9b64 343{
de6d9b64 344 int i;
55fde95e 345 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 346
de6d9b64 347 /* read the pixels */
de6d9b64 348 for(i=0;i<8;i++) {
c13e1abd
FH
349 pixels[0] = cm[block[0]];
350 pixels[1] = cm[block[1]];
351 pixels[2] = cm[block[2]];
352 pixels[3] = cm[block[3]];
353 pixels[4] = cm[block[4]];
354 pixels[5] = cm[block[5]];
355 pixels[6] = cm[block[6]];
356 pixels[7] = cm[block[7]];
357
358 pixels += line_size;
359 block += 8;
de6d9b64
FB
360 }
361}
362
178fcca8 363static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
bb270c08 364 int line_size)
178fcca8
MN
365{
366 int i;
55fde95e 367 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 368
178fcca8
MN
369 /* read the pixels */
370 for(i=0;i<4;i++) {
371 pixels[0] = cm[block[0]];
372 pixels[1] = cm[block[1]];
373 pixels[2] = cm[block[2]];
374 pixels[3] = cm[block[3]];
375
376 pixels += line_size;
377 block += 8;
378 }
379}
380
9ca358b9 381static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
bb270c08 382 int line_size)
9ca358b9
MN
383{
384 int i;
55fde95e 385 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 386
9ca358b9
MN
387 /* read the pixels */
388 for(i=0;i<2;i++) {
389 pixels[0] = cm[block[0]];
390 pixels[1] = cm[block[1]];
391
392 pixels += line_size;
393 block += 8;
394 }
395}
396
484a337c
RB
397void ff_put_signed_pixels_clamped_c(const DCTELEM *block,
398 uint8_t *restrict pixels,
399 int line_size)
f9ed9d85
MM
400{
401 int i, j;
402
403 for (i = 0; i < 8; i++) {
404 for (j = 0; j < 8; j++) {
405 if (*block < -128)
406 *pixels = 0;
407 else if (*block > 127)
408 *pixels = 255;
409 else
410 *pixels = (uint8_t)(*block + 128);
411 block++;
412 pixels++;
413 }
414 pixels += (line_size - 8);
415 }
416}
417
342c7dfd
KS
418static void put_pixels_nonclamped_c(const DCTELEM *block, uint8_t *restrict pixels,
419 int line_size)
420{
421 int i;
422
423 /* read the pixels */
424 for(i=0;i<8;i++) {
425 pixels[0] = block[0];
426 pixels[1] = block[1];
427 pixels[2] = block[2];
428 pixels[3] = block[3];
429 pixels[4] = block[4];
430 pixels[5] = block[5];
431 pixels[6] = block[6];
432 pixels[7] = block[7];
433
434 pixels += line_size;
435 block += 8;
436 }
437}
438
484a337c
RB
439void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
440 int line_size)
de6d9b64 441{
de6d9b64 442 int i;
55fde95e 443 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 444
de6d9b64 445 /* read the pixels */
de6d9b64 446 for(i=0;i<8;i++) {
c13e1abd
FH
447 pixels[0] = cm[pixels[0] + block[0]];
448 pixels[1] = cm[pixels[1] + block[1]];
449 pixels[2] = cm[pixels[2] + block[2]];
450 pixels[3] = cm[pixels[3] + block[3]];
451 pixels[4] = cm[pixels[4] + block[4]];
452 pixels[5] = cm[pixels[5] + block[5]];
453 pixels[6] = cm[pixels[6] + block[6]];
454 pixels[7] = cm[pixels[7] + block[7]];
455 pixels += line_size;
456 block += 8;
de6d9b64
FB
457 }
458}
178fcca8
MN
459
460static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
461 int line_size)
462{
463 int i;
55fde95e 464 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 465
178fcca8
MN
466 /* read the pixels */
467 for(i=0;i<4;i++) {
468 pixels[0] = cm[pixels[0] + block[0]];
469 pixels[1] = cm[pixels[1] + block[1]];
470 pixels[2] = cm[pixels[2] + block[2]];
471 pixels[3] = cm[pixels[3] + block[3]];
472 pixels += line_size;
473 block += 8;
474 }
475}
9ca358b9
MN
476
477static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
478 int line_size)
479{
480 int i;
55fde95e 481 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 482
9ca358b9
MN
483 /* read the pixels */
484 for(i=0;i<2;i++) {
485 pixels[0] = cm[pixels[0] + block[0]];
486 pixels[1] = cm[pixels[1] + block[1]];
487 pixels += line_size;
488 block += 8;
489 }
490}
36940eca 491
1edbfe19
LM
492static int sum_abs_dctelem_c(DCTELEM *block)
493{
494 int sum=0, i;
495 for(i=0; i<64; i++)
496 sum+= FFABS(block[i]);
497 return sum;
498}
499
342c7dfd
KS
500static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
501{
502 int i;
503
504 for (i = 0; i < h; i++) {
505 memset(block, value, 16);
506 block += line_size;
507 }
508}
509
510static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
511{
512 int i;
513
514 for (i = 0; i < h; i++) {
515 memset(block, value, 8);
516 block += line_size;
517 }
518}
519
520static void scale_block_c(const uint8_t src[64]/*align 8*/, uint8_t *dst/*align 8*/, int linesize)
521{
522 int i, j;
2023cfea
MR
523 uint16_t *dst1 = (uint16_t *) dst;
524 uint16_t *dst2 = (uint16_t *)(dst + linesize);
342c7dfd
KS
525
526 for (j = 0; j < 8; j++) {
527 for (i = 0; i < 8; i++) {
528 dst1[i] = dst2[i] = src[i] * 0x0101;
529 }
530 src += 8;
531 dst1 += linesize;
532 dst2 += linesize;
533 }
534}
535
de6d9b64
FB
536#define avg2(a,b) ((a+b+1)>>1)
537#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
538
0c1a9eda 539static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
44eb4951
MN
540{
541 const int A=(16-x16)*(16-y16);
542 const int B=( x16)*(16-y16);
543 const int C=(16-x16)*( y16);
544 const int D=( x16)*( y16);
545 int i;
44eb4951
MN
546
547 for(i=0; i<h; i++)
548 {
b3184779
MN
549 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
550 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
551 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
552 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
553 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
554 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
555 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
556 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
557 dst+= stride;
558 src+= stride;
44eb4951
MN
559 }
560}
561
703c8195 562void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
073b013d
MN
563 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
564{
565 int y, vx, vy;
566 const int s= 1<<shift;
115329f1 567
073b013d
MN
568 width--;
569 height--;
570
571 for(y=0; y<h; y++){
572 int x;
573
574 vx= ox;
575 vy= oy;
576 for(x=0; x<8; x++){ //XXX FIXME optimize
577 int src_x, src_y, frac_x, frac_y, index;
578
579 src_x= vx>>16;
580 src_y= vy>>16;
581 frac_x= src_x&(s-1);
582 frac_y= src_y&(s-1);
583 src_x>>=shift;
584 src_y>>=shift;
115329f1 585
073b013d
MN
586 if((unsigned)src_x < width){
587 if((unsigned)src_y < height){
588 index= src_x + src_y*stride;
589 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
590 + src[index +1]* frac_x )*(s-frac_y)
591 + ( src[index+stride ]*(s-frac_x)
592 + src[index+stride+1]* frac_x )* frac_y
593 + r)>>(shift*2);
594 }else{
f66e4f5f 595 index= src_x + av_clip(src_y, 0, height)*stride;
115329f1 596 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
073b013d
MN
597 + src[index +1]* frac_x )*s
598 + r)>>(shift*2);
599 }
600 }else{
601 if((unsigned)src_y < height){
f66e4f5f 602 index= av_clip(src_x, 0, width) + src_y*stride;
115329f1 603 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
073b013d
MN
604 + src[index+stride ]* frac_y )*s
605 + r)>>(shift*2);
606 }else{
f66e4f5f 607 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
073b013d
MN
608 dst[y*stride + x]= src[index ];
609 }
610 }
115329f1 611
073b013d
MN
612 vx+= dxx;
613 vy+= dyx;
614 }
615 ox += dxy;
616 oy += dyy;
617 }
618}
669ac79c
MN
619
620static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
621 switch(width){
622 case 2: put_pixels2_c (dst, src, stride, height); break;
623 case 4: put_pixels4_c (dst, src, stride, height); break;
624 case 8: put_pixels8_c (dst, src, stride, height); break;
625 case 16:put_pixels16_c(dst, src, stride, height); break;
626 }
627}
628
629static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
630 int i,j;
631 for (i=0; i < height; i++) {
632 for (j=0; j < width; j++) {
bb270c08 633 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
669ac79c
MN
634 }
635 src += stride;
636 dst += stride;
637 }
638}
639
640static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
641 int i,j;
642 for (i=0; i < height; i++) {
643 for (j=0; j < width; j++) {
bb270c08 644 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
669ac79c
MN
645 }
646 src += stride;
647 dst += stride;
648 }
649}
115329f1 650
669ac79c
MN
651static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
652 int i,j;
653 for (i=0; i < height; i++) {
654 for (j=0; j < width; j++) {
bb270c08 655 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
669ac79c
MN
656 }
657 src += stride;
658 dst += stride;
659 }
660}
115329f1 661
669ac79c
MN
662static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
663 int i,j;
664 for (i=0; i < height; i++) {
665 for (j=0; j < width; j++) {
bb270c08 666 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
667 }
668 src += stride;
669 dst += stride;
670 }
671}
672
673static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
674 int i,j;
675 for (i=0; i < height; i++) {
676 for (j=0; j < width; j++) {
bb270c08 677 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
678 }
679 src += stride;
680 dst += stride;
681 }
682}
683
684static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
685 int i,j;
686 for (i=0; i < height; i++) {
687 for (j=0; j < width; j++) {
bb270c08 688 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
669ac79c
MN
689 }
690 src += stride;
691 dst += stride;
692 }
693}
694
695static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
696 int i,j;
697 for (i=0; i < height; i++) {
698 for (j=0; j < width; j++) {
bb270c08 699 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
700 }
701 src += stride;
702 dst += stride;
703 }
704}
705
706static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
707 int i,j;
708 for (i=0; i < height; i++) {
709 for (j=0; j < width; j++) {
bb270c08 710 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
711 }
712 src += stride;
713 dst += stride;
714 }
715}
da3b9756
MM
716
717static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
718 switch(width){
719 case 2: avg_pixels2_c (dst, src, stride, height); break;
720 case 4: avg_pixels4_c (dst, src, stride, height); break;
721 case 8: avg_pixels8_c (dst, src, stride, height); break;
722 case 16:avg_pixels16_c(dst, src, stride, height); break;
723 }
724}
725
726static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
727 int i,j;
728 for (i=0; i < height; i++) {
729 for (j=0; j < width; j++) {
bb270c08 730 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
731 }
732 src += stride;
733 dst += stride;
734 }
735}
736
737static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
738 int i,j;
739 for (i=0; i < height; i++) {
740 for (j=0; j < width; j++) {
bb270c08 741 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
742 }
743 src += stride;
744 dst += stride;
745 }
746}
115329f1 747
da3b9756
MM
748static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
749 int i,j;
750 for (i=0; i < height; i++) {
751 for (j=0; j < width; j++) {
bb270c08 752 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
753 }
754 src += stride;
755 dst += stride;
756 }
757}
115329f1 758
da3b9756
MM
759static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
760 int i,j;
761 for (i=0; i < height; i++) {
762 for (j=0; j < width; j++) {
bb270c08 763 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
764 }
765 src += stride;
766 dst += stride;
767 }
768}
769
770static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
771 int i,j;
772 for (i=0; i < height; i++) {
773 for (j=0; j < width; j++) {
bb270c08 774 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
775 }
776 src += stride;
777 dst += stride;
778 }
779}
780
781static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
782 int i,j;
783 for (i=0; i < height; i++) {
784 for (j=0; j < width; j++) {
bb270c08 785 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
786 }
787 src += stride;
788 dst += stride;
789 }
790}
791
792static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
793 int i,j;
794 for (i=0; i < height; i++) {
795 for (j=0; j < width; j++) {
bb270c08 796 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
797 }
798 src += stride;
799 dst += stride;
800 }
801}
802
803static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
804 int i,j;
805 for (i=0; i < height; i++) {
806 for (j=0; j < width; j++) {
bb270c08 807 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
808 }
809 src += stride;
810 dst += stride;
811 }
812}
669ac79c
MN
813#if 0
814#define TPEL_WIDTH(width)\
815static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
816 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
817static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
818 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
819static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
820 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
821static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
822 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
823static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
824 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
825static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
826 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
827static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
828 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
829static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
830 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
831static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
832 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
833#endif
834
b3184779 835#define QPEL_MC(r, OPNAME, RND, OP) \
0c1a9eda 836static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
55fde95e 837 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779
MN
838 int i;\
839 for(i=0; i<h; i++)\
840 {\
841 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
842 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
843 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
844 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
845 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
846 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
847 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
848 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
849 dst+=dstStride;\
850 src+=srcStride;\
851 }\
44eb4951
MN
852}\
853\
0c1a9eda 854static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
db794953 855 const int w=8;\
55fde95e 856 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779
MN
857 int i;\
858 for(i=0; i<w; i++)\
859 {\
860 const int src0= src[0*srcStride];\
861 const int src1= src[1*srcStride];\
862 const int src2= src[2*srcStride];\
863 const int src3= src[3*srcStride];\
864 const int src4= src[4*srcStride];\
865 const int src5= src[5*srcStride];\
866 const int src6= src[6*srcStride];\
867 const int src7= src[7*srcStride];\
868 const int src8= src[8*srcStride];\
869 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
870 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
871 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
872 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
873 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
874 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
875 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
876 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
877 dst++;\
878 src++;\
879 }\
880}\
881\
0c1a9eda 882static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
55fde95e 883 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779 884 int i;\
826f429a 885 \
b3184779
MN
886 for(i=0; i<h; i++)\
887 {\
888 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
889 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
890 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
891 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
892 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
893 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
894 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
895 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
896 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
897 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
898 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
899 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
900 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
901 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
902 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
903 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
904 dst+=dstStride;\
905 src+=srcStride;\
906 }\
907}\
908\
0c1a9eda 909static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
55fde95e 910 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779 911 int i;\
826f429a 912 const int w=16;\
b3184779
MN
913 for(i=0; i<w; i++)\
914 {\
915 const int src0= src[0*srcStride];\
916 const int src1= src[1*srcStride];\
917 const int src2= src[2*srcStride];\
918 const int src3= src[3*srcStride];\
919 const int src4= src[4*srcStride];\
920 const int src5= src[5*srcStride];\
921 const int src6= src[6*srcStride];\
922 const int src7= src[7*srcStride];\
923 const int src8= src[8*srcStride];\
924 const int src9= src[9*srcStride];\
925 const int src10= src[10*srcStride];\
926 const int src11= src[11*srcStride];\
927 const int src12= src[12*srcStride];\
928 const int src13= src[13*srcStride];\
929 const int src14= src[14*srcStride];\
930 const int src15= src[15*srcStride];\
931 const int src16= src[16*srcStride];\
932 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
933 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
934 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
935 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
936 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
937 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
938 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
939 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
940 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
941 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
942 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
943 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
944 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
945 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
946 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
947 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
948 dst++;\
949 src++;\
950 }\
951}\
952\
0c1a9eda
ZK
953static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
954 uint8_t half[64];\
b3184779
MN
955 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
956 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
44eb4951
MN
957}\
958\
0c1a9eda 959static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
b3184779 960 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
44eb4951
MN
961}\
962\
0c1a9eda
ZK
963static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
964 uint8_t half[64];\
b3184779
MN
965 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
966 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
44eb4951
MN
967}\
968\
0c1a9eda
ZK
969static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
970 uint8_t full[16*9];\
971 uint8_t half[64];\
b3184779 972 copy_block9(full, src, 16, stride, 9);\
db794953 973 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
b3184779 974 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
44eb4951
MN
975}\
976\
0c1a9eda
ZK
977static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
978 uint8_t full[16*9];\
b3184779 979 copy_block9(full, src, 16, stride, 9);\
db794953 980 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
44eb4951
MN
981}\
982\
0c1a9eda
ZK
983static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
984 uint8_t full[16*9];\
985 uint8_t half[64];\
b3184779 986 copy_block9(full, src, 16, stride, 9);\
db794953 987 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
b3184779 988 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
44eb4951 989}\
0c1a9eda
ZK
990void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
991 uint8_t full[16*9];\
992 uint8_t halfH[72];\
993 uint8_t halfV[64];\
994 uint8_t halfHV[64];\
b3184779
MN
995 copy_block9(full, src, 16, stride, 9);\
996 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
997 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
998 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 999 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1000}\
0c1a9eda
ZK
1001static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1002 uint8_t full[16*9];\
1003 uint8_t halfH[72];\
1004 uint8_t halfHV[64];\
db794953
MN
1005 copy_block9(full, src, 16, stride, 9);\
1006 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1007 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1008 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1009 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1010}\
0c1a9eda
ZK
1011void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1012 uint8_t full[16*9];\
1013 uint8_t halfH[72];\
1014 uint8_t halfV[64];\
1015 uint8_t halfHV[64];\
b3184779
MN
1016 copy_block9(full, src, 16, stride, 9);\
1017 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1018 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1019 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1020 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1021}\
0c1a9eda
ZK
1022static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1023 uint8_t full[16*9];\
1024 uint8_t halfH[72];\
1025 uint8_t halfHV[64];\
db794953
MN
1026 copy_block9(full, src, 16, stride, 9);\
1027 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1028 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1029 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1030 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1031}\
0c1a9eda
ZK
1032void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1033 uint8_t full[16*9];\
1034 uint8_t halfH[72];\
1035 uint8_t halfV[64];\
1036 uint8_t halfHV[64];\
b3184779
MN
1037 copy_block9(full, src, 16, stride, 9);\
1038 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1039 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1040 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1041 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1042}\
0c1a9eda
ZK
1043static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1044 uint8_t full[16*9];\
1045 uint8_t halfH[72];\
1046 uint8_t halfHV[64];\
db794953
MN
1047 copy_block9(full, src, 16, stride, 9);\
1048 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1049 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1050 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1051 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1052}\
0c1a9eda
ZK
1053void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1054 uint8_t full[16*9];\
1055 uint8_t halfH[72];\
1056 uint8_t halfV[64];\
1057 uint8_t halfHV[64];\
b3184779
MN
1058 copy_block9(full, src, 16, stride, 9);\
1059 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
db794953
MN
1060 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1061 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1062 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1063}\
0c1a9eda
ZK
1064static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1065 uint8_t full[16*9];\
1066 uint8_t halfH[72];\
1067 uint8_t halfHV[64];\
db794953
MN
1068 copy_block9(full, src, 16, stride, 9);\
1069 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1070 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1071 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1072 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1073}\
0c1a9eda
ZK
1074static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1075 uint8_t halfH[72];\
1076 uint8_t halfHV[64];\
b3184779 1077 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1078 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1079 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
44eb4951 1080}\
0c1a9eda
ZK
1081static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1082 uint8_t halfH[72];\
1083 uint8_t halfHV[64];\
b3184779 1084 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1085 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1086 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
44eb4951 1087}\
0c1a9eda
ZK
1088void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1089 uint8_t full[16*9];\
1090 uint8_t halfH[72];\
1091 uint8_t halfV[64];\
1092 uint8_t halfHV[64];\
b3184779
MN
1093 copy_block9(full, src, 16, stride, 9);\
1094 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1095 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1096 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1097 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 1098}\
0c1a9eda
ZK
1099static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1100 uint8_t full[16*9];\
1101 uint8_t halfH[72];\
db794953
MN
1102 copy_block9(full, src, 16, stride, 9);\
1103 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1104 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1105 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1106}\
0c1a9eda
ZK
1107void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1108 uint8_t full[16*9];\
1109 uint8_t halfH[72];\
1110 uint8_t halfV[64];\
1111 uint8_t halfHV[64];\
b3184779
MN
1112 copy_block9(full, src, 16, stride, 9);\
1113 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1114 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1115 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1116 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 1117}\
0c1a9eda
ZK
1118static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1119 uint8_t full[16*9];\
1120 uint8_t halfH[72];\
db794953
MN
1121 copy_block9(full, src, 16, stride, 9);\
1122 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1123 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1124 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1125}\
0c1a9eda
ZK
1126static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1127 uint8_t halfH[72];\
b3184779 1128 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1129 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
b3184779 1130}\
b3184779 1131\
0c1a9eda
ZK
1132static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1133 uint8_t half[256];\
b3184779
MN
1134 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1135 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
1136}\
1137\
0c1a9eda 1138static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
b3184779 1139 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
44eb4951 1140}\
b3184779 1141\
0c1a9eda
ZK
1142static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1143 uint8_t half[256];\
b3184779
MN
1144 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1145 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
1146}\
1147\
0c1a9eda
ZK
1148static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1149 uint8_t full[24*17];\
1150 uint8_t half[256];\
b3184779 1151 copy_block17(full, src, 24, stride, 17);\
826f429a 1152 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
b3184779
MN
1153 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
1154}\
1155\
0c1a9eda
ZK
1156static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1157 uint8_t full[24*17];\
b3184779 1158 copy_block17(full, src, 24, stride, 17);\
826f429a 1159 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
b3184779
MN
1160}\
1161\
0c1a9eda
ZK
1162static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1163 uint8_t full[24*17];\
1164 uint8_t half[256];\
b3184779 1165 copy_block17(full, src, 24, stride, 17);\
826f429a 1166 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
b3184779
MN
1167 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
1168}\
0c1a9eda
ZK
1169void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1170 uint8_t full[24*17];\
1171 uint8_t halfH[272];\
1172 uint8_t halfV[256];\
1173 uint8_t halfHV[256];\
b3184779
MN
1174 copy_block17(full, src, 24, stride, 17);\
1175 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1176 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1177 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1178 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1179}\
0c1a9eda
ZK
1180static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1181 uint8_t full[24*17];\
1182 uint8_t halfH[272];\
1183 uint8_t halfHV[256];\
db794953
MN
1184 copy_block17(full, src, 24, stride, 17);\
1185 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1186 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1187 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1188 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1189}\
0c1a9eda
ZK
1190void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1191 uint8_t full[24*17];\
1192 uint8_t halfH[272];\
1193 uint8_t halfV[256];\
1194 uint8_t halfHV[256];\
b3184779
MN
1195 copy_block17(full, src, 24, stride, 17);\
1196 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1197 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1198 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1199 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1200}\
0c1a9eda
ZK
1201static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1202 uint8_t full[24*17];\
1203 uint8_t halfH[272];\
1204 uint8_t halfHV[256];\
db794953
MN
1205 copy_block17(full, src, 24, stride, 17);\
1206 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1207 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1208 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1209 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1210}\
0c1a9eda
ZK
1211void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1212 uint8_t full[24*17];\
1213 uint8_t halfH[272];\
1214 uint8_t halfV[256];\
1215 uint8_t halfHV[256];\
b3184779
MN
1216 copy_block17(full, src, 24, stride, 17);\
1217 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1218 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1219 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1220 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1221}\
0c1a9eda
ZK
1222static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1223 uint8_t full[24*17];\
1224 uint8_t halfH[272];\
1225 uint8_t halfHV[256];\
db794953
MN
1226 copy_block17(full, src, 24, stride, 17);\
1227 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1228 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1229 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1230 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1231}\
0c1a9eda
ZK
1232void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1233 uint8_t full[24*17];\
1234 uint8_t halfH[272];\
1235 uint8_t halfV[256];\
1236 uint8_t halfHV[256];\
b3184779
MN
1237 copy_block17(full, src, 24, stride, 17);\
1238 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
826f429a
MN
1239 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1240 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1241 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1242}\
0c1a9eda
ZK
1243static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1244 uint8_t full[24*17];\
1245 uint8_t halfH[272];\
1246 uint8_t halfHV[256];\
db794953
MN
1247 copy_block17(full, src, 24, stride, 17);\
1248 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1249 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1250 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1251 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1252}\
0c1a9eda
ZK
1253static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1254 uint8_t halfH[272];\
1255 uint8_t halfHV[256];\
b3184779 1256 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1257 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1258 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1259}\
0c1a9eda
ZK
1260static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1261 uint8_t halfH[272];\
1262 uint8_t halfHV[256];\
b3184779 1263 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1264 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1265 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1266}\
0c1a9eda
ZK
1267void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1268 uint8_t full[24*17];\
1269 uint8_t halfH[272];\
1270 uint8_t halfV[256];\
1271 uint8_t halfHV[256];\
b3184779
MN
1272 copy_block17(full, src, 24, stride, 17);\
1273 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1274 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1275 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1276 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
1277}\
0c1a9eda
ZK
1278static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1279 uint8_t full[24*17];\
1280 uint8_t halfH[272];\
db794953
MN
1281 copy_block17(full, src, 24, stride, 17);\
1282 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1283 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1284 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1285}\
0c1a9eda
ZK
1286void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1287 uint8_t full[24*17];\
1288 uint8_t halfH[272];\
1289 uint8_t halfV[256];\
1290 uint8_t halfHV[256];\
b3184779
MN
1291 copy_block17(full, src, 24, stride, 17);\
1292 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1293 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1294 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
1295 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
1296}\
0c1a9eda
ZK
1297static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1298 uint8_t full[24*17];\
1299 uint8_t halfH[272];\
db794953
MN
1300 copy_block17(full, src, 24, stride, 17);\
1301 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1302 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1303 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1304}\
0c1a9eda
ZK
1305static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1306 uint8_t halfH[272];\
b3184779 1307 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1308 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
45553457 1309}
44eb4951 1310
b3184779
MN
1311#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1312#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1313#define op_put(a, b) a = cm[((b) + 16)>>5]
1314#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1315
1316QPEL_MC(0, put_ , _ , op_put)
1317QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1318QPEL_MC(0, avg_ , _ , op_avg)
1319//QPEL_MC(1, avg_no_rnd , _ , op_avg)
1320#undef op_avg
1321#undef op_avg_no_rnd
1322#undef op_put
1323#undef op_put_no_rnd
44eb4951 1324
3d1b1caa
MR
1325#define put_qpel8_mc00_c ff_put_pixels8x8_c
1326#define avg_qpel8_mc00_c ff_avg_pixels8x8_c
1327#define put_qpel16_mc00_c ff_put_pixels16x16_c
1328#define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1329#define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
1330#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
1331
1457ab52 1332static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
55fde95e 1333 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1457ab52
MN
1334 int i;
1335
1336 for(i=0; i<h; i++){
1337 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1338 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1339 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1340 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1341 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1342 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1343 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1344 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1345 dst+=dstStride;
115329f1 1346 src+=srcStride;
1457ab52
MN
1347 }
1348}
1349
b250f9c6 1350#if CONFIG_RV40_DECODER
2d8a0815
KS
1351static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1352 put_pixels16_xy2_c(dst, src, stride, 16);
1353}
1354static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1355 avg_pixels16_xy2_c(dst, src, stride, 16);
1356}
1357static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1358 put_pixels8_xy2_c(dst, src, stride, 8);
1359}
1360static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1361 avg_pixels8_xy2_c(dst, src, stride, 8);
1362}
2d8a0815
KS
1363#endif /* CONFIG_RV40_DECODER */
1364
1457ab52 1365static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
55fde95e 1366 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1457ab52
MN
1367 int i;
1368
1369 for(i=0; i<w; i++){
1370 const int src_1= src[ -srcStride];
1371 const int src0 = src[0 ];
1372 const int src1 = src[ srcStride];
1373 const int src2 = src[2*srcStride];
1374 const int src3 = src[3*srcStride];
1375 const int src4 = src[4*srcStride];
1376 const int src5 = src[5*srcStride];
1377 const int src6 = src[6*srcStride];
1378 const int src7 = src[7*srcStride];
1379 const int src8 = src[8*srcStride];
1380 const int src9 = src[9*srcStride];
1381 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1382 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1383 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1384 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1385 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1386 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1387 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1388 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1389 src++;
1390 dst++;
1391 }
1392}
1393
1457ab52
MN
1394static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
1395 uint8_t half[64];
1396 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1397 put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
1398}
1399
1400static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
1401 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
1402}
1403
1404static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
1405 uint8_t half[64];
1406 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1407 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
1408}
1409
1410static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
1411 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
1412}
1413
1414static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
1415 uint8_t halfH[88];
1416 uint8_t halfV[64];
1417 uint8_t halfHV[64];
1418 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1419 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1420 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1421 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
1422}
1423static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
1424 uint8_t halfH[88];
1425 uint8_t halfV[64];
1426 uint8_t halfHV[64];
1427 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1428 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1429 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1430 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
1431}
1432static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
1433 uint8_t halfH[88];
1434 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1435 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
1436}
1437
332f9ac4 1438static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
4052cbf1 1439 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
332f9ac4
MN
1440 int x;
1441 const int strength= ff_h263_loop_filter_strength[qscale];
115329f1 1442
332f9ac4
MN
1443 for(x=0; x<8; x++){
1444 int d1, d2, ad1;
1445 int p0= src[x-2*stride];
1446 int p1= src[x-1*stride];
1447 int p2= src[x+0*stride];
1448 int p3= src[x+1*stride];
1449 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1450
1451 if (d<-2*strength) d1= 0;
1452 else if(d<- strength) d1=-2*strength - d;
1453 else if(d< strength) d1= d;
1454 else if(d< 2*strength) d1= 2*strength - d;
1455 else d1= 0;
115329f1 1456
332f9ac4
MN
1457 p1 += d1;
1458 p2 -= d1;
1459 if(p1&256) p1= ~(p1>>31);
1460 if(p2&256) p2= ~(p2>>31);
115329f1 1461
332f9ac4
MN
1462 src[x-1*stride] = p1;
1463 src[x+0*stride] = p2;
1464
c26abfa5 1465 ad1= FFABS(d1)>>1;
115329f1 1466
f66e4f5f 1467 d2= av_clip((p0-p3)/4, -ad1, ad1);
115329f1 1468
332f9ac4
MN
1469 src[x-2*stride] = p0 - d2;
1470 src[x+ stride] = p3 + d2;
1471 }
73f51a4d 1472 }
332f9ac4
MN
1473}
1474
1475static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
4052cbf1 1476 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
332f9ac4
MN
1477 int y;
1478 const int strength= ff_h263_loop_filter_strength[qscale];
115329f1 1479
332f9ac4
MN
1480 for(y=0; y<8; y++){
1481 int d1, d2, ad1;
1482 int p0= src[y*stride-2];
1483 int p1= src[y*stride-1];
1484 int p2= src[y*stride+0];
1485 int p3= src[y*stride+1];
1486 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1487
1488 if (d<-2*strength) d1= 0;
1489 else if(d<- strength) d1=-2*strength - d;
1490 else if(d< strength) d1= d;
1491 else if(d< 2*strength) d1= 2*strength - d;
1492 else d1= 0;
115329f1 1493
332f9ac4
MN
1494 p1 += d1;
1495 p2 -= d1;
1496 if(p1&256) p1= ~(p1>>31);
1497 if(p2&256) p2= ~(p2>>31);
115329f1 1498
332f9ac4
MN
1499 src[y*stride-1] = p1;
1500 src[y*stride+0] = p2;
1501
c26abfa5 1502 ad1= FFABS(d1)>>1;
115329f1 1503
f66e4f5f 1504 d2= av_clip((p0-p3)/4, -ad1, ad1);
115329f1 1505
332f9ac4
MN
1506 src[y*stride-2] = p0 - d2;
1507 src[y*stride+1] = p3 + d2;
1508 }
73f51a4d 1509 }
332f9ac4 1510}
1457ab52 1511
fdbbf2e0
MN
1512static void h261_loop_filter_c(uint8_t *src, int stride){
1513 int x,y,xy,yz;
1514 int temp[64];
1515
1516 for(x=0; x<8; x++){
1517 temp[x ] = 4*src[x ];
1518 temp[x + 7*8] = 4*src[x + 7*stride];
1519 }
1520 for(y=1; y<7; y++){
1521 for(x=0; x<8; x++){
1522 xy = y * stride + x;
1523 yz = y * 8 + x;
1524 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
c6148de2
MN
1525 }
1526 }
115329f1 1527
fdbbf2e0
MN
1528 for(y=0; y<8; y++){
1529 src[ y*stride] = (temp[ y*8] + 2)>>2;
1530 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
1531 for(x=1; x<7; x++){
1532 xy = y * stride + x;
1533 yz = y * 8 + x;
1534 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
c6148de2
MN
1535 }
1536 }
1537}
1538
bb198e19 1539static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
1540{
1541 int s, i;
1542
1543 s = 0;
bb198e19 1544 for(i=0;i<h;i++) {
de6d9b64
FB
1545 s += abs(pix1[0] - pix2[0]);
1546 s += abs(pix1[1] - pix2[1]);
1547 s += abs(pix1[2] - pix2[2]);
1548 s += abs(pix1[3] - pix2[3]);
1549 s += abs(pix1[4] - pix2[4]);
1550 s += abs(pix1[5] - pix2[5]);
1551 s += abs(pix1[6] - pix2[6]);
1552 s += abs(pix1[7] - pix2[7]);
1553 s += abs(pix1[8] - pix2[8]);
1554 s += abs(pix1[9] - pix2[9]);
1555 s += abs(pix1[10] - pix2[10]);
1556 s += abs(pix1[11] - pix2[11]);
1557 s += abs(pix1[12] - pix2[12]);
1558 s += abs(pix1[13] - pix2[13]);
1559 s += abs(pix1[14] - pix2[14]);
1560 s += abs(pix1[15] - pix2[15]);
1561 pix1 += line_size;
1562 pix2 += line_size;
1563 }
1564 return s;
1565}
1566
bb198e19 1567static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
1568{
1569 int s, i;
1570
1571 s = 0;
bb198e19 1572 for(i=0;i<h;i++) {
de6d9b64
FB
1573 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1574 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1575 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1576 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1577 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1578 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1579 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1580 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1581 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
1582 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
1583 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
1584 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
1585 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
1586 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
1587 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
1588 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
1589 pix1 += line_size;
1590 pix2 += line_size;
1591 }
1592 return s;
1593}
1594
bb198e19 1595static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
1596{
1597 int s, i;
0c1a9eda 1598 uint8_t *pix3 = pix2 + line_size;
de6d9b64
FB
1599
1600 s = 0;
bb198e19 1601 for(i=0;i<h;i++) {
de6d9b64
FB
1602 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1603 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1604 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1605 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1606 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1607 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1608 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1609 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1610 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
1611 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
1612 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
1613 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
1614 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
1615 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
1616 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
1617 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
1618 pix1 += line_size;
1619 pix2 += line_size;
1620 pix3 += line_size;
1621 }
1622 return s;
1623}
1624
bb198e19 1625static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
1626{
1627 int s, i;
0c1a9eda 1628 uint8_t *pix3 = pix2 + line_size;
de6d9b64
FB
1629
1630 s = 0;
bb198e19 1631 for(i=0;i<h;i++) {
de6d9b64
FB
1632 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1633 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1634 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1635 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1636 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1637 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1638 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1639 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1640 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1641 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1642 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1643 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1644 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1645 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1646 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1647 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1648 pix1 += line_size;
1649 pix2 += line_size;
1650 pix3 += line_size;
1651 }
1652 return s;
1653}
1654
bb198e19 1655static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
1656{
1657 int s, i;
1658
1659 s = 0;
bb198e19 1660 for(i=0;i<h;i++) {
ba6802de
MN
1661 s += abs(pix1[0] - pix2[0]);
1662 s += abs(pix1[1] - pix2[1]);
1663 s += abs(pix1[2] - pix2[2]);
1664 s += abs(pix1[3] - pix2[3]);
1665 s += abs(pix1[4] - pix2[4]);
1666 s += abs(pix1[5] - pix2[5]);
1667 s += abs(pix1[6] - pix2[6]);
1668 s += abs(pix1[7] - pix2[7]);
1669 pix1 += line_size;
1670 pix2 += line_size;
1671 }
1672 return s;
1673}
1674
bb198e19 1675static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
1676{
1677 int s, i;
1678
1679 s = 0;
bb198e19 1680 for(i=0;i<h;i++) {
ba6802de
MN
1681 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1682 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1683 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1684 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1685 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1686 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1687 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1688 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1689 pix1 += line_size;
1690 pix2 += line_size;
1691 }
1692 return s;
1693}
1694
bb198e19 1695static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
1696{
1697 int s, i;
0c1a9eda 1698 uint8_t *pix3 = pix2 + line_size;
ba6802de
MN
1699
1700 s = 0;
bb198e19 1701 for(i=0;i<h;i++) {
ba6802de
MN
1702 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1703 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1704 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1705 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1706 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1707 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1708 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1709 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1710 pix1 += line_size;
1711 pix2 += line_size;
1712 pix3 += line_size;
1713 }
1714 return s;
1715}
1716
bb198e19 1717static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
1718{
1719 int s, i;
0c1a9eda 1720 uint8_t *pix3 = pix2 + line_size;
ba6802de
MN
1721
1722 s = 0;
bb198e19 1723 for(i=0;i<h;i++) {
ba6802de
MN
1724 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1725 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1726 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1727 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1728 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1729 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1730 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1731 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1732 pix1 += line_size;
1733 pix2 += line_size;
1734 pix3 += line_size;
1735 }
1736 return s;
1737}
1738
bf4e3bd2
MR
1739static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1740 MpegEncContext *c = v;
e6a2ac34
MN
1741 int score1=0;
1742 int score2=0;
1743 int x,y;
d4c5d2ad 1744
e6a2ac34
MN
1745 for(y=0; y<h; y++){
1746 for(x=0; x<16; x++){
1747 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1748 }
1749 if(y+1<h){
1750 for(x=0; x<15; x++){
c26abfa5 1751 score2+= FFABS( s1[x ] - s1[x +stride]
e6a2ac34 1752 - s1[x+1] + s1[x+1+stride])
c26abfa5 1753 -FFABS( s2[x ] - s2[x +stride]
e6a2ac34
MN
1754 - s2[x+1] + s2[x+1+stride]);
1755 }
1756 }
1757 s1+= stride;
1758 s2+= stride;
1759 }
d4c5d2ad 1760
c26abfa5
DB
1761 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1762 else return score1 + FFABS(score2)*8;
e6a2ac34
MN
1763}
1764
bf4e3bd2
MR
1765static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1766 MpegEncContext *c = v;
e6a2ac34
MN
1767 int score1=0;
1768 int score2=0;
1769 int x,y;
115329f1 1770
e6a2ac34
MN
1771 for(y=0; y<h; y++){
1772 for(x=0; x<8; x++){
1773 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1774 }
1775 if(y+1<h){
1776 for(x=0; x<7; x++){
c26abfa5 1777 score2+= FFABS( s1[x ] - s1[x +stride]
e6a2ac34 1778 - s1[x+1] + s1[x+1+stride])
c26abfa5 1779 -FFABS( s2[x ] - s2[x +stride]
e6a2ac34
MN
1780 - s2[x+1] + s2[x+1+stride]);
1781 }
1782 }
1783 s1+= stride;
1784 s2+= stride;
1785 }
115329f1 1786
c26abfa5
DB
1787 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1788 else return score1 + FFABS(score2)*8;
e6a2ac34
MN
1789}
1790
364a1797
MN
1791static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
1792 int i;
1793 unsigned int sum=0;
1794
1795 for(i=0; i<8*8; i++){
1796 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
1797 int w= weight[i];
1798 b>>= RECON_SHIFT;
1799 assert(-512<b && b<512);
1800
1801 sum += (w*b)*(w*b)>>4;
1802 }
1803 return sum>>2;
1804}
1805
1806static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
1807 int i;
1808
1809 for(i=0; i<8*8; i++){
1810 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
115329f1 1811 }
364a1797
MN
1812}
1813
a9badb51
MN
1814/**
1815 * permutes an 8x8 block.
2a5700de 1816 * @param block the block which will be permuted according to the given permutation vector
a9badb51
MN
1817 * @param permutation the permutation vector
1818 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
115329f1 1819 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
2a5700de 1820 * (inverse) permutated to scantable order!
a9badb51 1821 */
0c1a9eda 1822void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
d962f6fd 1823{
7801d21d 1824 int i;
477ab036 1825 DCTELEM temp[64];
115329f1 1826
7801d21d 1827 if(last<=0) return;
90b5b51e 1828 //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
d962f6fd 1829
7801d21d
MN
1830 for(i=0; i<=last; i++){
1831 const int j= scantable[i];
1832 temp[j]= block[j];
1833 block[j]=0;
1834 }
115329f1 1835
7801d21d
MN
1836 for(i=0; i<=last; i++){
1837 const int j= scantable[i];
1838 const int perm_j= permutation[j];
1839 block[perm_j]= temp[j];
1840 }
d962f6fd 1841}
e0eac44e 1842
622348f9
MN
1843static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
1844 return 0;
1845}
1846
1847void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
1848 int i;
115329f1 1849
3899eb2f 1850 memset(cmp, 0, sizeof(void*)*6);
115329f1 1851
3899eb2f 1852 for(i=0; i<6; i++){
622348f9
MN
1853 switch(type&0xFF){
1854 case FF_CMP_SAD:
1855 cmp[i]= c->sad[i];
1856 break;
1857 case FF_CMP_SATD:
1858 cmp[i]= c->hadamard8_diff[i];
1859 break;
1860 case FF_CMP_SSE:
1861 cmp[i]= c->sse[i];
1862 break;
1863 case FF_CMP_DCT:
1864 cmp[i]= c->dct_sad[i];
1865 break;
27c61ac5
MN
1866 case FF_CMP_DCT264:
1867 cmp[i]= c->dct264_sad[i];
1868 break;
0fd6aea1
MN
1869 case FF_CMP_DCTMAX:
1870 cmp[i]= c->dct_max[i];
1871 break;
622348f9
MN
1872 case FF_CMP_PSNR:
1873 cmp[i]= c->quant_psnr[i];
1874 break;
1875 case FF_CMP_BIT:
1876 cmp[i]= c->bit[i];
1877 break;
1878 case FF_CMP_RD:
1879 cmp[i]= c->rd[i];
1880 break;
1881 case FF_CMP_VSAD:
1882 cmp[i]= c->vsad[i];
1883 break;
1884 case FF_CMP_VSSE:
1885 cmp[i]= c->vsse[i];
1886 break;
1887 case FF_CMP_ZERO:
1888 cmp[i]= zero_cmp;
1889 break;
e6a2ac34
MN
1890 case FF_CMP_NSSE:
1891 cmp[i]= c->nsse[i];
1892 break;
05aec7bb 1893#if CONFIG_DWT
26efc54e
MN
1894 case FF_CMP_W53:
1895 cmp[i]= c->w53[i];
1896 break;
1897 case FF_CMP_W97:
1898 cmp[i]= c->w97[i];
1899 break;
3a6fc8fa 1900#endif
622348f9
MN
1901 default:
1902 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
1903 }
1904 }
1905}
1906
11f18faf 1907static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
469bd7b1
LM
1908 long i;
1909 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1910 long a = *(long*)(src+i);
1911 long b = *(long*)(dst+i);
1912 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
11f18faf
MN
1913 }
1914 for(; i<w; i++)
1915 dst[i+0] += src[i+0];
1916}
1917
4a9ca0a2 1918static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
469bd7b1 1919 long i;
4a9ca0a2
LM
1920 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1921 long a = *(long*)(src1+i);
1922 long b = *(long*)(src2+i);
469bd7b1 1923 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
4a9ca0a2
LM
1924 }
1925 for(; i<w; i++)
1926 dst[i] = src1[i]+src2[i];
1927}
1928
11f18faf 1929static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
469bd7b1 1930 long i;
b250f9c6 1931#if !HAVE_FAST_UNALIGNED
469bd7b1 1932 if((long)src2 & (sizeof(long)-1)){
31304587
LM
1933 for(i=0; i+7<w; i+=8){
1934 dst[i+0] = src1[i+0]-src2[i+0];
1935 dst[i+1] = src1[i+1]-src2[i+1];
1936 dst[i+2] = src1[i+2]-src2[i+2];
1937 dst[i+3] = src1[i+3]-src2[i+3];
1938 dst[i+4] = src1[i+4]-src2[i+4];
1939 dst[i+5] = src1[i+5]-src2[i+5];
1940 dst[i+6] = src1[i+6]-src2[i+6];
1941 dst[i+7] = src1[i+7]-src2[i+7];
1942 }
469bd7b1
LM
1943 }else
1944#endif
1945 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1946 long a = *(long*)(src1+i);
1947 long b = *(long*)(src2+i);
1948 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
1949 }
11f18faf
MN
1950 for(; i<w; i++)
1951 dst[i+0] = src1[i+0]-src2[i+0];
1952}
1953
e17ccf60 1954static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
3daa434a
LM
1955 int i;
1956 uint8_t l, lt;
1957
1958 l= *left;
1959 lt= *left_top;
1960
1961 for(i=0; i<w; i++){
1962 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
1963 lt= src1[i];
1964 dst[i]= l;
1965 }
1966
1967 *left= l;
1968 *left_top= lt;
1969}
1970
e17ccf60 1971static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
84705403
MN
1972 int i;
1973 uint8_t l, lt;
1974
1975 l= *left;
1976 lt= *left_top;
1977
1978 for(i=0; i<w; i++){
1979 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
1980 lt= src1[i];
1981 l= src2[i];
1982 dst[i]= l - pred;
115329f1 1983 }
84705403
MN
1984
1985 *left= l;
1986 *left_top= lt;
1987}
1988
2d4bbdec 1989static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
73c6f598
NC
1990 int i;
1991
1992 for(i=0; i<w-1; i++){
1993 acc+= src[i];
1994 dst[i]= acc;
1995 i++;
1996 acc+= src[i];
1997 dst[i]= acc;
1998 }
1999
2000 for(; i<w; i++){
2001 acc+= src[i];
2002 dst[i]= acc;
2003 }
2004
2005 return acc;
2006}
2007
2008#if HAVE_BIGENDIAN
2009#define B 3
2010#define G 2
2011#define R 1
f267d3ac 2012#define A 0
73c6f598
NC
2013#else
2014#define B 0
2015#define G 1
2016#define R 2
f267d3ac 2017#define A 3
73c6f598 2018#endif
f267d3ac 2019static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
73c6f598 2020 int i;
f267d3ac 2021 int r,g,b,a;
73c6f598
NC
2022 r= *red;
2023 g= *green;
2024 b= *blue;
f267d3ac 2025 a= *alpha;
73c6f598
NC
2026
2027 for(i=0; i<w; i++){
2028 b+= src[4*i+B];
2029 g+= src[4*i+G];
2030 r+= src[4*i+R];
f267d3ac 2031 a+= src[4*i+A];
73c6f598
NC
2032
2033 dst[4*i+B]= b;
2034 dst[4*i+G]= g;
2035 dst[4*i+R]= r;
f267d3ac 2036 dst[4*i+A]= a;
73c6f598
NC
2037 }
2038
2039 *red= r;
2040 *green= g;
2041 *blue= b;
f267d3ac 2042 *alpha= a;
73c6f598
NC
2043}
2044#undef B
2045#undef G
2046#undef R
f267d3ac 2047#undef A
73c6f598 2048
1457ab52
MN
2049#define BUTTERFLY2(o1,o2,i1,i2) \
2050o1= (i1)+(i2);\
2051o2= (i1)-(i2);
2052
2053#define BUTTERFLY1(x,y) \
2054{\
2055 int a,b;\
2056 a= x;\
2057 b= y;\
2058 x= a+b;\
2059 y= a-b;\
2060}
2061
c26abfa5 2062#define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
1457ab52 2063
bb198e19 2064static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
1457ab52
MN
2065 int i;
2066 int temp[64];
2067 int sum=0;
115329f1 2068
bb198e19 2069 assert(h==8);
1457ab52
MN
2070
2071 for(i=0; i<8; i++){
2072 //FIXME try pointer walks
2073 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
2074 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
2075 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
2076 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
115329f1 2077
1457ab52
MN
2078 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2079 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2080 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2081 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
115329f1 2082
1457ab52
MN
2083 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2084 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2085 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2086 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2087 }
2088
2089 for(i=0; i<8; i++){
2090 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2091 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2092 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2093 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
115329f1 2094
1457ab52
MN
2095 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2096 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2097 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2098 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2099
115329f1 2100 sum +=
1457ab52
MN
2101 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2102 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2103 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2104 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2105 }
1457ab52
MN
2106 return sum;
2107}
2108
622348f9 2109static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
1457ab52
MN
2110 int i;
2111 int temp[64];
2112 int sum=0;
115329f1 2113
622348f9 2114 assert(h==8);
115329f1 2115
1457ab52
MN
2116 for(i=0; i<8; i++){
2117 //FIXME try pointer walks
622348f9
MN
2118 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
2119 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
2120 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
2121 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
115329f1 2122
1457ab52
MN
2123 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2124 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2125 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2126 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
115329f1 2127
1457ab52
MN
2128 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2129 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2130 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2131 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2132 }
2133
2134 for(i=0; i<8; i++){
2135 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2136 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2137 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2138 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
115329f1 2139
1457ab52
MN
2140 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2141 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2142 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2143 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
115329f1
DB
2144
2145 sum +=
1457ab52
MN
2146 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2147 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2148 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2149 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2150 }
115329f1 2151
c26abfa5 2152 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
115329f1 2153
1457ab52
MN
2154 return sum;
2155}
2156
bb198e19 2157static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
1457ab52 2158 MpegEncContext * const s= (MpegEncContext *)c;
40d11227 2159 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
115329f1 2160
bb198e19 2161 assert(h==8);
1457ab52
MN
2162
2163 s->dsp.diff_pixels(temp, src1, src2, stride);
b0368839 2164 s->dsp.fdct(temp);
1edbfe19 2165 return s->dsp.sum_abs_dctelem(temp);
1457ab52
MN
2166}
2167
b250f9c6 2168#if CONFIG_GPL
27c61ac5
MN
2169#define DCT8_1D {\
2170 const int s07 = SRC(0) + SRC(7);\
2171 const int s16 = SRC(1) + SRC(6);\
2172 const int s25 = SRC(2) + SRC(5);\
2173 const int s34 = SRC(3) + SRC(4);\
2174 const int a0 = s07 + s34;\
2175 const int a1 = s16 + s25;\
2176 const int a2 = s07 - s34;\
2177 const int a3 = s16 - s25;\
2178 const int d07 = SRC(0) - SRC(7);\
2179 const int d16 = SRC(1) - SRC(6);\
2180 const int d25 = SRC(2) - SRC(5);\
2181 const int d34 = SRC(3) - SRC(4);\
2182 const int a4 = d16 + d25 + (d07 + (d07>>1));\
2183 const int a5 = d07 - d34 - (d25 + (d25>>1));\
2184 const int a6 = d07 + d34 - (d16 + (d16>>1));\
2185 const int a7 = d16 - d25 + (d34 + (d34>>1));\
2186 DST(0, a0 + a1 ) ;\
2187 DST(1, a4 + (a7>>2)) ;\
2188 DST(2, a2 + (a3>>1)) ;\
2189 DST(3, a5 + (a6>>2)) ;\
2190 DST(4, a0 - a1 ) ;\
2191 DST(5, a6 - (a5>>2)) ;\
2192 DST(6, (a2>>1) - a3 ) ;\
2193 DST(7, (a4>>2) - a7 ) ;\
2194}
2195
2196static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2197 MpegEncContext * const s= (MpegEncContext *)c;
8d15910a 2198 DCTELEM dct[8][8];
27c61ac5
MN
2199 int i;
2200 int sum=0;
2201
8d15910a 2202 s->dsp.diff_pixels(dct[0], src1, src2, stride);
27c61ac5
MN
2203
2204#define SRC(x) dct[i][x]
2205#define DST(x,v) dct[i][x]= v
2206 for( i = 0; i < 8; i++ )
2207 DCT8_1D
2208#undef SRC
2209#undef DST
2210
2211#define SRC(x) dct[x][i]
c26abfa5 2212#define DST(x,v) sum += FFABS(v)
27c61ac5
MN
2213 for( i = 0; i < 8; i++ )
2214 DCT8_1D
2215#undef SRC
2216#undef DST
2217 return sum;
2218}
2219#endif
2220
0fd6aea1
MN
2221static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2222 MpegEncContext * const s= (MpegEncContext *)c;
40d11227 2223 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
0fd6aea1 2224 int sum=0, i;
115329f1 2225
0fd6aea1
MN
2226 assert(h==8);
2227
2228 s->dsp.diff_pixels(temp, src1, src2, stride);
2229 s->dsp.fdct(temp);
2230
2231 for(i=0; i<64; i++)
c26abfa5 2232 sum= FFMAX(sum, FFABS(temp[i]));
115329f1 2233
0fd6aea1
MN
2234 return sum;
2235}
2236
bb198e19 2237static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
1457ab52 2238 MpegEncContext * const s= (MpegEncContext *)c;
40d11227 2239 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
2480c390 2240 DCTELEM * const bak = temp+64;
1457ab52
MN
2241 int sum=0, i;
2242
bb198e19 2243 assert(h==8);
1457ab52 2244 s->mb_intra=0;
115329f1 2245
1457ab52 2246 s->dsp.diff_pixels(temp, src1, src2, stride);
115329f1 2247
1457ab52 2248 memcpy(bak, temp, 64*sizeof(DCTELEM));
115329f1 2249
67725183 2250 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
d50635cd 2251 s->dct_unquantize_inter(s, temp, 0, s->qscale);
59e6f60a 2252 ff_simple_idct(temp); //FIXME
115329f1 2253
1457ab52
MN
2254 for(i=0; i<64; i++)
2255 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
115329f1 2256
1457ab52
MN
2257 return sum;
2258}
2259
bb198e19 2260static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3a87ac94 2261 MpegEncContext * const s= (MpegEncContext *)c;
0c1a9eda 2262 const uint8_t *scantable= s->intra_scantable.permutated;
40d11227
MR
2263 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2264 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
2265 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
e6dba5df 2266 int i, last, run, bits, level, distortion, start_i;
3a87ac94
MN
2267 const int esc_length= s->ac_esc_length;
2268 uint8_t * length;
2269 uint8_t * last_length;
115329f1 2270
bb198e19
MN
2271 assert(h==8);
2272
90d43b52
MR
2273 copy_block8(lsrc1, src1, 8, stride, 8);
2274 copy_block8(lsrc2, src2, 8, stride, 8);
3a87ac94 2275
90d43b52 2276 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
67725183
MN
2277
2278 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2279
2280 bits=0;
115329f1 2281
3a87ac94 2282 if (s->mb_intra) {
115329f1 2283 start_i = 1;
3a87ac94
MN
2284 length = s->intra_ac_vlc_length;
2285 last_length= s->intra_ac_vlc_last_length;
67725183 2286 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3a87ac94
MN
2287 } else {
2288 start_i = 0;
2289 length = s->inter_ac_vlc_length;
2290 last_length= s->inter_ac_vlc_last_length;
2291 }
115329f1 2292
67725183 2293 if(last>=start_i){
3a87ac94
MN
2294 run=0;
2295 for(i=start_i; i<last; i++){
2296 int j= scantable[i];
2297 level= temp[j];
115329f1 2298
3a87ac94
MN
2299 if(level){
2300 level+=64;
2301 if((level&(~127)) == 0){
2302 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2303 }else
2304 bits+= esc_length;
2305 run=0;
2306 }else
2307 run++;
2308 }
2309 i= scantable[last];
115329f1 2310
3a87ac94 2311 level= temp[i] + 64;
1d0eab1d
MN
2312
2313 assert(level - 64);
115329f1 2314
3a87ac94
MN
2315 if((level&(~127)) == 0){
2316 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2317 }else
2318 bits+= esc_length;
115329f1 2319
67725183
MN
2320 }
2321
2322 if(last>=0){
d50635cd
MN
2323 if(s->mb_intra)
2324 s->dct_unquantize_intra(s, temp, 0, s->qscale);
2325 else
2326 s->dct_unquantize_inter(s, temp, 0, s->qscale);
3a87ac94 2327 }
115329f1 2328
90d43b52 2329 s->dsp.idct_add(lsrc2, 8, temp);
115329f1 2330
90d43b52 2331 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
3a87ac94 2332
e6dba5df 2333 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
3a87ac94
MN
2334}
2335
bb198e19 2336static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3a87ac94 2337 MpegEncContext * const s= (MpegEncContext *)c;
0c1a9eda 2338 const uint8_t *scantable= s->intra_scantable.permutated;
40d11227 2339 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
3a87ac94
MN
2340 int i, last, run, bits, level, start_i;
2341 const int esc_length= s->ac_esc_length;
2342 uint8_t * length;
2343 uint8_t * last_length;
bb198e19
MN
2344
2345 assert(h==8);
115329f1 2346
67725183 2347 s->dsp.diff_pixels(temp, src1, src2, stride);
3a87ac94 2348
67725183
MN
2349 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2350
2351 bits=0;
115329f1 2352
3a87ac94 2353 if (s->mb_intra) {
115329f1 2354 start_i = 1;
3a87ac94
MN
2355 length = s->intra_ac_vlc_length;
2356 last_length= s->intra_ac_vlc_last_length;
67725183 2357 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3a87ac94
MN
2358 } else {
2359 start_i = 0;
2360 length = s->inter_ac_vlc_length;
2361 last_length= s->inter_ac_vlc_last_length;
2362 }
115329f1 2363
67725183 2364 if(last>=start_i){
3a87ac94
MN
2365 run=0;
2366 for(i=start_i; i<last; i++){
2367 int j= scantable[i];
2368 level= temp[j];
115329f1 2369
3a87ac94
MN
2370 if(level){
2371 level+=64;
2372 if((level&(~127)) == 0){
2373 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2374 }else
2375 bits+= esc_length;
2376 run=0;
2377 }else
2378 run++;
2379 }
2380 i= scantable[last];
115329f1 2381
67725183 2382 level= temp[i] + 64;
115329f1 2383
67725183 2384 assert(level - 64);
115329f1 2385
3a87ac94
MN
2386 if((level&(~127)) == 0){
2387 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2388 }else
2389 bits+= esc_length;
2390 }
2391
2392 return bits;
2393}
2394
7fb7f636
RS
2395#define VSAD_INTRA(size) \
2396static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2397 int score=0; \
2398 int x,y; \
2399 \
2400 for(y=1; y<h; y++){ \
2401 for(x=0; x<size; x+=4){ \
2402 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
2403 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
2404 } \
2405 s+= stride; \
2406 } \
2407 \
2408 return score; \
2409}
2410VSAD_INTRA(8)
2411VSAD_INTRA(16)
622348f9
MN
2412
2413static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2414 int score=0;
2415 int x,y;
115329f1 2416
622348f9
MN
2417 for(y=1; y<h; y++){
2418 for(x=0; x<16; x++){
c26abfa5 2419 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
622348f9
MN
2420 }
2421 s1+= stride;
2422 s2+= stride;
2423 }
115329f1 2424
622348f9
MN
2425 return score;
2426}
2427
2428#define SQ(a) ((a)*(a))
7fb7f636
RS
2429#define VSSE_INTRA(size) \
2430static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2431 int score=0; \
2432 int x,y; \
2433 \
2434 for(y=1; y<h; y++){ \
2435 for(x=0; x<size; x+=4){ \
2436 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
2437 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
2438 } \
2439 s+= stride; \
2440 } \
2441 \
2442 return score; \
2443}
2444VSSE_INTRA(8)
2445VSSE_INTRA(16)
622348f9
MN
2446
2447static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2448 int score=0;
2449 int x,y;
115329f1 2450
622348f9
MN
2451 for(y=1; y<h; y++){
2452 for(x=0; x<16; x++){
2453 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2454 }
2455 s1+= stride;
2456 s2+= stride;
2457 }
115329f1 2458
622348f9
MN
2459 return score;
2460}
2461
a00177a9
MR
2462static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
2463 int size){
59006372
LM
2464 int score=0;
2465 int i;
2466 for(i=0; i<size; i++)
2467 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
2468 return score;
2469}
2470
9fbd14ac
DB
2471WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
2472WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
2473WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
b250f9c6 2474#if CONFIG_GPL
9fbd14ac 2475WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
60900991 2476#endif
9fbd14ac
DB
2477WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
2478WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
2479WRAPPER8_16_SQ(rd8x8_c, rd16_c)
2480WRAPPER8_16_SQ(bit8x8_c, bit16_c)
1457ab52 2481
6eabb0d3 2482static void vector_fmul_c(float *dst, const float *src0, const float *src1, int len){
eb4825b5
LM
2483 int i;
2484 for(i=0; i<len; i++)
6eabb0d3 2485 dst[i] = src0[i] * src1[i];
eb4825b5
LM
2486}
2487
2488static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
2489 int i;
2490 src1 += len-1;
2491 for(i=0; i<len; i++)
2492 dst[i] = src0[i] * src1[-i];
2493}
2494
952e8721 2495static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
eb4825b5
LM
2496 int i;
2497 for(i=0; i<len; i++)
952e8721 2498 dst[i] = src0[i] * src1[i] + src2[i];
eb4825b5
LM
2499}
2500
80ba1ddb
JR
2501static void vector_fmul_window_c(float *dst, const float *src0,
2502 const float *src1, const float *win, int len)
2503{
b9fa3208
LM
2504 int i,j;
2505 dst += len;
2506 win += len;
2507 src0+= len;
2508 for(i=-len, j=len-1; i<0; i++, j--) {
2509 float s0 = src0[i];
2510 float s1 = src1[j];
2511 float wi = win[i];
2512 float wj = win[j];
80ba1ddb
JR
2513 dst[i] = s0*wj - s1*wi;
2514 dst[j] = s0*wi + s1*wj;
b9fa3208 2515 }
f27e1d64
LM
2516}
2517
53b57211
MR
2518static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
2519 int len)
2520{
2521 int i;
2522 for (i = 0; i < len; i++)
2523 dst[i] = src[i] * mul;
2524}
2525
2526static void vector_fmul_sv_scalar_2_c(float *dst, const float *src,
2527 const float **sv, float mul, int len)
2528{
2529 int i;
2530 for (i = 0; i < len; i += 2, sv++) {
2531 dst[i ] = src[i ] * sv[0][0] * mul;
2532 dst[i+1] = src[i+1] * sv[0][1] * mul;
2533 }
2534}
2535
2536static void vector_fmul_sv_scalar_4_c(float *dst, const float *src,
2537 const float **sv, float mul, int len)
2538{
2539 int i;
2540 for (i = 0; i < len; i += 4, sv++) {
2541 dst[i ] = src[i ] * sv[0][0] * mul;
2542 dst[i+1] = src[i+1] * sv[0][1] * mul;
2543 dst[i+2] = src[i+2] * sv[0][2] * mul;
2544 dst[i+3] = src[i+3] * sv[0][3] * mul;
2545 }
2546}
2547
2548static void sv_fmul_scalar_2_c(float *dst, const float **sv, float mul,
2549 int len)
2550{
2551 int i;
2552 for (i = 0; i < len; i += 2, sv++) {
2553 dst[i ] = sv[0][0] * mul;
2554 dst[i+1] = sv[0][1] * mul;
2555 }
2556}
2557
2558static void sv_fmul_scalar_4_c(float *dst, const float **sv, float mul,
2559 int len)
2560{
2561 int i;
2562 for (i = 0; i < len; i += 4, sv++) {
2563 dst[i ] = sv[0][0] * mul;
2564 dst[i+1] = sv[0][1] * mul;
2565 dst[i+2] = sv[0][2] * mul;
2566 dst[i+3] = sv[0][3] * mul;
2567 }
2568}
2569
2570static void butterflies_float_c(float *restrict v1, float *restrict v2,
2571 int len)
2572{
2573 int i;
2574 for (i = 0; i < len; i++) {
2575 float t = v1[i] - v2[i];
2576 v1[i] += v2[i];
2577 v2[i] = t;
2578 }
2579}
2580
2581static float scalarproduct_float_c(const float *v1, const float *v2, int len)
2582{
2583 float p = 0.0;
2584 int i;
2585
2586 for (i = 0; i < len; i++)
2587 p += v1[i] * v2[i];
2588
2589 return p;
2590}
2591
0a68cd87
VS
2592static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
2593 uint32_t maxi, uint32_t maxisign)
2594{
2595
2596 if(a > mini) return mini;
187a5379 2597 else if((a^(1U<<31)) > maxisign) return maxi;
0a68cd87
VS
2598 else return a;
2599}
2600
50e23ae9 2601static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
0a68cd87
VS
2602 int i;
2603 uint32_t mini = *(uint32_t*)min;
2604 uint32_t maxi = *(uint32_t*)max;
187a5379 2605 uint32_t maxisign = maxi ^ (1U<<31);
0a68cd87 2606 uint32_t *dsti = (uint32_t*)dst;
50e23ae9 2607 const uint32_t *srci = (const uint32_t*)src;
0a68cd87
VS
2608 for(i=0; i<len; i+=8) {
2609 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
2610 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
2611 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
2612 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
2613 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
2614 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
2615 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
2616 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
2617 }
2618}
50e23ae9 2619static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
0a68cd87
VS
2620 int i;
2621 if(min < 0 && max > 0) {
2622 vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
2623 } else {
2624 for(i=0; i < len; i+=8) {
2625 dst[i ] = av_clipf(src[i ], min, max);
2626 dst[i + 1] = av_clipf(src[i + 1], min, max);
2627 dst[i + 2] = av_clipf(src[i + 2], min, max);
2628 dst[i + 3] = av_clipf(src[i + 3], min, max);
2629 dst[i + 4] = av_clipf(src[i + 4], min, max);
2630 dst[i + 5] = av_clipf(src[i + 5], min, max);
2631 dst[i + 6] = av_clipf(src[i + 6], min, max);
2632 dst[i + 7] = av_clipf(src[i + 7], min, max);
2633 }
2634 }
2635}
2636
b3858964 2637static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift)
88c0536a
KS
2638{
2639 int res = 0;
2640
2641 while (order--)
2642 res += (*v1++ * *v2++) >> shift;
2643
2644 return res;
2645}
2646
b3858964 2647static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
b1159ad9
LM
2648{
2649 int res = 0;
2650 while (order--) {
2651 res += *v1 * *v2++;
2652 *v1++ += mul * *v3++;
2653 }
2654 return res;
2655}
2656
e6e98234
JR
2657static void apply_window_int16_c(int16_t *output, const int16_t *input,
2658 const int16_t *window, unsigned int len)
2659{
2660 int i;
2661 int len2 = len >> 1;
2662
2663 for (i = 0; i < len2; i++) {
2664 int16_t w = window[i];
2665 output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15;
2666 output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
2667 }
2668}
2669
9abc7e0f
MN
2670#define W0 2048
2671#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
2672#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
2673#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
2674#define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
2675#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
2676#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
2677#define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
2678
2679static void wmv2_idct_row(short * b)
2680{
2681 int s1,s2;
2682 int a0,a1,a2,a3,a4,a5,a6,a7;
2683 /*step 1*/
2684 a1 = W1*b[1]+W7*b[7];
2685 a7 = W7*b[1]-W1*b[7];
2686 a5 = W5*b[5]+W3*b[3];
2687 a3 = W3*b[5]-W5*b[3];
2688 a2 = W2*b[2]+W6*b[6];
2689 a6 = W6*b[2]-W2*b[6];
2690 a0 = W0*b[0]+W0*b[4];
2691 a4 = W0*b[0]-W0*b[4];
2692 /*step 2*/
2693 s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7,
2694 s2 = (181*(a1-a5-a7+a3)+128)>>8;
2695 /*step 3*/
2696 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
2697 b[1] = (a4+a6 +s1 + (1<<7))>>8;
2698 b[2] = (a4-a6 +s2 + (1<<7))>>8;
2699 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
2700 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
2701 b[5] = (a4-a6 -s2 + (1<<7))>>8;
2702 b[6] = (a4+a6 -s1 + (1<<7))>>8;
2703 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
2704}
2705static void wmv2_idct_col(short * b)
2706{
2707 int s1,s2;
2708 int a0,a1,a2,a3,a4,a5,a6,a7;
2709 /*step 1, with extended precision*/
2710 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
2711 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
2712 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
2713 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
2714 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
2715 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
2716 a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
2717 a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
2718 /*step 2*/
2719 s1 = (181*(a1-a5+a7-a3)+128)>>8;
2720 s2 = (181*(a1-a5-a7+a3)+128)>>8;
2721 /*step 3*/
2722 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
2723 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
2724 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
2725 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
2726
2727 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
2728 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
2729 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
2730 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
2731}
2732void ff_wmv2_idct_c(short * block){
2733 int i;
2734
2735 for(i=0;i<64;i+=8){
2736 wmv2_idct_row(block+i);
2737 }
2738 for(i=0;i<8;i++){
2739 wmv2_idct_col(block+i);
2740 }
2741}
b0368839
MN
2742/* XXX: those functions should be suppressed ASAP when all IDCTs are
2743 converted */
9abc7e0f
MN
2744static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
2745{
2746 ff_wmv2_idct_c(block);
484a337c 2747 ff_put_pixels_clamped_c(block, dest, line_size);
9abc7e0f
MN
2748}
2749static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
2750{
2751 ff_wmv2_idct_c(block);
484a337c 2752 ff_add_pixels_clamped_c(block, dest, line_size);
9abc7e0f 2753}
b0368839
MN
2754static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
2755{
2756 j_rev_dct (block);
484a337c 2757 ff_put_pixels_clamped_c(block, dest, line_size);
b0368839
MN
2758}
2759static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
2760{
2761 j_rev_dct (block);
484a337c 2762 ff_add_pixels_clamped_c(block, dest, line_size);
b0368839
MN
2763}
2764
178fcca8
MN
2765static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
2766{
2767 j_rev_dct4 (block);
2768 put_pixels_clamped4_c(block, dest, line_size);
2769}
2770static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
2771{
2772 j_rev_dct4 (block);
2773 add_pixels_clamped4_c(block, dest, line_size);
2774}
2775
9ca358b9
MN
2776static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
2777{
2778 j_rev_dct2 (block);
2779 put_pixels_clamped2_c(block, dest, line_size);
2780}
2781static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
2782{
2783 j_rev_dct2 (block);
2784 add_pixels_clamped2_c(block, dest, line_size);
2785}
2786
1aa8c57b
MN
2787static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
2788{
55fde95e 2789 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1aa8c57b
MN
2790
2791 dest[0] = cm[(block[0] + 4)>>3];
2792}
2793static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
2794{
55fde95e 2795 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1aa8c57b
MN
2796
2797 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
2798}
2799
d111e41f 2800static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
513fbd8e 2801
59cf08ce 2802/* init static data */
0752cd39 2803av_cold void dsputil_static_init(void)
e0eac44e 2804{
d2975f8d 2805 int i;
e0eac44e 2806
55fde95e 2807 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
59cf08ce 2808 for(i=0;i<MAX_NEG_CROP;i++) {
55fde95e
MR
2809 ff_cropTbl[i] = 0;
2810 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
59cf08ce 2811 }
115329f1 2812
59cf08ce 2813 for(i=0;i<512;i++) {
1d503957 2814 ff_squareTbl[i] = (i - 256) * (i - 256);
59cf08ce 2815 }
115329f1 2816
486497e0 2817 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
59cf08ce 2818}
92ddb692 2819
6dc7d5da
MN
2820int ff_check_alignment(void){
2821 static int did_fail=0;
84dc2d8a 2822 DECLARE_ALIGNED(16, int, aligned);
6dc7d5da 2823
d4efacff 2824 if((intptr_t)&aligned & 15){
6dc7d5da 2825 if(!did_fail){
b250f9c6 2826#if HAVE_MMX || HAVE_ALTIVEC
6dc7d5da 2827 av_log(NULL, AV_LOG_ERROR,
c1173617
MR
2828 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
2829 "and may be very slow or crash. This is not a bug in libavcodec,\n"
5e4c7ca2 2830 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
6001dad6 2831 "Do not report crashes to Libav developers.\n");
6dc7d5da
MN
2832#endif
2833 did_fail=1;
2834 }
2835 return -1;
2836 }
2837 return 0;
2838}
92ddb692 2839
0752cd39 2840av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
59cf08ce
FB
2841{
2842 int i;
de6d9b64 2843
6dc7d5da
MN
2844 ff_check_alignment();
2845
b250f9c6 2846#if CONFIG_ENCODERS
10acc479 2847 if(avctx->dct_algo==FF_DCT_FASTINT) {
b0368839 2848 c->fdct = fdct_ifast;
bb270c08 2849 c->fdct248 = fdct_ifast248;
115329f1 2850 }
10acc479 2851 else if(avctx->dct_algo==FF_DCT_FAAN) {
65e4c8c9 2852 c->fdct = ff_faandct;
bb270c08 2853 c->fdct248 = ff_faandct248;
115329f1 2854 }
10acc479 2855 else {
b0368839 2856 c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
bb270c08 2857 c->fdct248 = ff_fdct248_islow;
10acc479 2858 }
b0368839
MN
2859#endif //CONFIG_ENCODERS
2860
178fcca8 2861 if(avctx->lowres==1){
49fb20cb 2862 if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO || !CONFIG_H264_DECODER){
0fa8158d
MN
2863 c->idct_put= ff_jref_idct4_put;
2864 c->idct_add= ff_jref_idct4_add;
2865 }else{
2866 c->idct_put= ff_h264_lowres_idct_put_c;
2867 c->idct_add= ff_h264_lowres_idct_add_c;
2868 }
178fcca8 2869 c->idct = j_rev_dct4;
b0368839 2870 c->idct_permutation_type= FF_NO_IDCT_PERM;
9ca358b9
MN
2871 }else if(avctx->lowres==2){
2872 c->idct_put= ff_jref_idct2_put;
2873 c->idct_add= ff_jref_idct2_add;
2874 c->idct = j_rev_dct2;
2875 c->idct_permutation_type= FF_NO_IDCT_PERM;
1aa8c57b
MN
2876 }else if(avctx->lowres==3){
2877 c->idct_put= ff_jref_idct1_put;
2878 c->idct_add= ff_jref_idct1_add;
2879 c->idct = j_rev_dct1;
2880 c->idct_permutation_type= FF_NO_IDCT_PERM;
178fcca8
MN
2881 }else{
2882 if(avctx->idct_algo==FF_IDCT_INT){
2883 c->idct_put= ff_jref_idct_put;
2884 c->idct_add= ff_jref_idct_add;
2885 c->idct = j_rev_dct;
2886 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
99e5a9d1 2887 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
9b5dc867 2888 avctx->idct_algo==FF_IDCT_VP3){
8b6103da
MN
2889 c->idct_put= ff_vp3_idct_put_c;
2890 c->idct_add= ff_vp3_idct_add_c;
2891 c->idct = ff_vp3_idct_c;
2892 c->idct_permutation_type= FF_NO_IDCT_PERM;
9abc7e0f
MN
2893 }else if(avctx->idct_algo==FF_IDCT_WMV2){
2894 c->idct_put= ff_wmv2_idct_put_c;
2895 c->idct_add= ff_wmv2_idct_add_c;
2896 c->idct = ff_wmv2_idct_c;
2897 c->idct_permutation_type= FF_NO_IDCT_PERM;
6f08c541
MN
2898 }else if(avctx->idct_algo==FF_IDCT_FAAN){
2899 c->idct_put= ff_faanidct_put;
2900 c->idct_add= ff_faanidct_add;
2901 c->idct = ff_faanidct;
2902 c->idct_permutation_type= FF_NO_IDCT_PERM;
49fb20cb 2903 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
28245435
PR
2904 c->idct_put= ff_ea_idct_put_c;
2905 c->idct_permutation_type= FF_NO_IDCT_PERM;
342c7dfd
KS
2906 }else if(CONFIG_BINK_DECODER && avctx->idct_algo==FF_IDCT_BINK) {
2907 c->idct = ff_bink_idct_c;
2908 c->idct_add = ff_bink_idct_add_c;
2909 c->idct_put = ff_bink_idct_put_c;
2910 c->idct_permutation_type = FF_NO_IDCT_PERM;
178fcca8 2911 }else{ //accurate/default
59e6f60a
AJ
2912 c->idct_put= ff_simple_idct_put;
2913 c->idct_add= ff_simple_idct_add;
2914 c->idct = ff_simple_idct;
178fcca8
MN
2915 c->idct_permutation_type= FF_NO_IDCT_PERM;
2916 }
b0368839
MN
2917 }
2918
eb4b3dd3
ZK
2919 c->get_pixels = get_pixels_c;
2920 c->diff_pixels = diff_pixels_c;
484a337c
RB
2921 c->put_pixels_clamped = ff_put_pixels_clamped_c;
2922 c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
342c7dfd 2923 c->put_pixels_nonclamped = put_pixels_nonclamped_c;
484a337c 2924 c->add_pixels_clamped = ff_add_pixels_clamped_c;
36940eca
LM
2925 c->add_pixels8 = add_pixels8_c;
2926 c->add_pixels4 = add_pixels4_c;
1edbfe19 2927 c->sum_abs_dctelem = sum_abs_dctelem_c;
2e279598 2928 c->emulated_edge_mc = ff_emulated_edge_mc;
eb4b3dd3 2929 c->gmc1 = gmc1_c;
703c8195 2930 c->gmc = ff_gmc_c;
5fecfb7d 2931 c->clear_block = clear_block_c;
eb4b3dd3
ZK
2932 c->clear_blocks = clear_blocks_c;
2933 c->pix_sum = pix_sum_c;
2934 c->pix_norm1 = pix_norm1_c;
2935
342c7dfd
KS
2936 c->fill_block_tab[0] = fill_block16_c;
2937 c->fill_block_tab[1] = fill_block8_c;
2938 c->scale_block = scale_block_c;
2939
45553457 2940 /* TODO [0] 16 [1] 8 */
bb198e19
MN
2941 c->pix_abs[0][0] = pix_abs16_c;
2942 c->pix_abs[0][1] = pix_abs16_x2_c;
2943 c->pix_abs[0][2] = pix_abs16_y2_c;
2944 c->pix_abs[0][3] = pix_abs16_xy2_c;
2945 c->pix_abs[1][0] = pix_abs8_c;
2946 c->pix_abs[1][1] = pix_abs8_x2_c;
2947 c->pix_abs[1][2] = pix_abs8_y2_c;
2948 c->pix_abs[1][3] = pix_abs8_xy2_c;
eb4b3dd3 2949
45553457
ZK
2950#define dspfunc(PFX, IDX, NUM) \
2951 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
2952 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \
2953 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \
2954 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
2955
2956 dspfunc(put, 0, 16);
2957 dspfunc(put_no_rnd, 0, 16);
2958 dspfunc(put, 1, 8);
2959 dspfunc(put_no_rnd, 1, 8);
669ac79c
MN
2960 dspfunc(put, 2, 4);
2961 dspfunc(put, 3, 2);
45553457
ZK
2962
2963 dspfunc(avg, 0, 16);
2964 dspfunc(avg_no_rnd, 0, 16);
2965 dspfunc(avg, 1, 8);
2966 dspfunc(avg_no_rnd, 1, 8);
da3b9756
MM
2967 dspfunc(avg, 2, 4);
2968 dspfunc(avg, 3, 2);
45553457
ZK
2969#undef dspfunc
2970
c0a0170c
MN
2971 c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c;
2972 c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c;
2973
669ac79c
MN
2974 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
2975 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
2976 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
2977 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
2978 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
2979 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
2980 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
2981 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
2982 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
2983
da3b9756
MM
2984 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
2985 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
2986 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
2987 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
2988 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
2989 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
2990 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
2991 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
2992 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
2993
45553457
ZK
2994#define dspfunc(PFX, IDX, NUM) \
2995 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2996 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2997 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2998 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2999 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
3000 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
3001 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
3002 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
3003 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
3004 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
3005 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
3006 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
3007 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
3008 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
3009 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
3010 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
3011
3012 dspfunc(put_qpel, 0, 16);
3013 dspfunc(put_no_rnd_qpel, 0, 16);
3014
3015 dspfunc(avg_qpel, 0, 16);
3016 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
3017
3018 dspfunc(put_qpel, 1, 8);
3019 dspfunc(put_no_rnd_qpel, 1, 8);
3020
3021 dspfunc(avg_qpel, 1, 8);
3022 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
0da71265
MN
3023
3024 dspfunc(put_h264_qpel, 0, 16);
3025 dspfunc(put_h264_qpel, 1, 8);
3026 dspfunc(put_h264_qpel, 2, 4);
80e44bc3 3027 dspfunc(put_h264_qpel, 3, 2);
0da71265
MN
3028 dspfunc(avg_h264_qpel, 0, 16);
3029 dspfunc(avg_h264_qpel, 1, 8);
3030 dspfunc(avg_h264_qpel, 2, 4);
3031
45553457 3032#undef dspfunc
0da71265
MN
3033 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
3034 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
3035 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
3036 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
3037 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
3038 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
c9a2ebc4 3039
5a6a9e78
AJ
3040 c->draw_edges = draw_edges_c;
3041
bf4f19dc
RP
3042#if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
3043 ff_mlp_init(c, avctx);
3044#endif
9be6f0d2 3045#if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
9abc7e0f
MN
3046 ff_intrax8dsp_init(c,avctx);
3047#endif
b250f9c6 3048#if CONFIG_RV30_DECODER
6beb8b26
KS
3049 ff_rv30dsp_init(c,avctx);
3050#endif
b250f9c6 3051#if CONFIG_RV40_DECODER
2d8a0815
KS
3052 ff_rv40dsp_init(c,avctx);
3053 c->put_rv40_qpel_pixels_tab[0][15] = put_rv40_qpel16_mc33_c;
3054 c->avg_rv40_qpel_pixels_tab[0][15] = avg_rv40_qpel16_mc33_c;
3055 c->put_rv40_qpel_pixels_tab[1][15] = put_rv40_qpel8_mc33_c;
3056 c->avg_rv40_qpel_pixels_tab[1][15] = avg_rv40_qpel8_mc33_c;
3057#endif
b482e2d1 3058
3d1b1caa 3059 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
1457ab52
MN
3060 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
3061 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
3062 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
3063 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
3064 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
3065 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
3066 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
115329f1 3067
bb198e19
MN
3068#define SET_CMP_FUNC(name) \
3069 c->name[0]= name ## 16_c;\
3070 c->name[1]= name ## 8x8_c;
115329f1 3071
bb198e19 3072 SET_CMP_FUNC(hadamard8_diff)
622348f9 3073 c->hadamard8_diff[4]= hadamard8_intra16_c;
7fb7f636 3074 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
bb198e19 3075 SET_CMP_FUNC(dct_sad)
0fd6aea1 3076 SET_CMP_FUNC(dct_max)
b250f9c6 3077#if CONFIG_GPL
27c61ac5 3078 SET_CMP_FUNC(dct264_sad)
60900991 3079#endif
bb198e19
MN
3080 c->sad[0]= pix_abs16_c;
3081 c->sad[1]= pix_abs8_c;
3082 c->sse[0]= sse16_c;
3083 c->sse[1]= sse8_c;
26efc54e 3084 c->sse[2]= sse4_c;
bb198e19
MN
3085 SET_CMP_FUNC(quant_psnr)
3086 SET_CMP_FUNC(rd)
3087 SET_CMP_FUNC(bit)
622348f9
MN
3088 c->vsad[0]= vsad16_c;
3089 c->vsad[4]= vsad_intra16_c;
7fb7f636 3090 c->vsad[5]= vsad_intra8_c;
622348f9
MN
3091 c->vsse[0]= vsse16_c;
3092 c->vsse[4]= vsse_intra16_c;
7fb7f636 3093 c->vsse[5]= vsse_intra8_c;
e6a2ac34
MN
3094 c->nsse[0]= nsse16_c;
3095 c->nsse[1]= nsse8_c;
05aec7bb
MR
3096#if CONFIG_DWT
3097 ff_dsputil_init_dwt(c);
3a6fc8fa 3098#endif
26efc54e 3099
59006372
LM
3100 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
3101
11f18faf 3102 c->add_bytes= add_bytes_c;
4a9ca0a2 3103 c->add_bytes_l2= add_bytes_l2_c;
11f18faf 3104 c->diff_bytes= diff_bytes_c;
3daa434a 3105 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
84705403 3106 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
73c6f598
NC
3107 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
3108 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
3d2e8cce 3109 c->bswap_buf= bswap_buf;
381d37fd 3110 c->bswap16_buf = bswap16_buf;
b250f9c6 3111#if CONFIG_PNG_DECODER
4a9ca0a2
LM
3112 c->add_png_paeth_prediction= ff_add_png_paeth_prediction;
3113#endif
42251a2a 3114
4052cbf1 3115 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
674eeb5f
AJ
3116 c->h263_h_loop_filter= h263_h_loop_filter_c;
3117 c->h263_v_loop_filter= h263_v_loop_filter_c;
eb75a698 3118 }
115329f1 3119
99e5a9d1 3120 if (CONFIG_VP3_DECODER) {
9971331d
DC
3121 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
3122 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
eb6a6cd7 3123 c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
9971331d
DC
3124 }
3125
fdbbf2e0 3126 c->h261_loop_filter= h261_loop_filter_c;
115329f1 3127
364a1797
MN
3128 c->try_8x8basis= try_8x8basis_c;
3129 c->add_8x8basis= add_8x8basis_c;
11f18faf 3130
b250f9c6 3131#if CONFIG_VORBIS_DECODER
2dac4acf
LM
3132 c->vorbis_inverse_coupling = vorbis_inverse_coupling;
3133#endif
b250f9c6 3134#if CONFIG_AC3_DECODER
ac2e5564
LM
3135 c->ac3_downmix = ff_ac3_downmix_c;
3136#endif
eb4825b5
LM
3137 c->vector_fmul = vector_fmul_c;
3138 c->vector_fmul_reverse = vector_fmul_reverse_c;
952e8721 3139 c->vector_fmul_add = vector_fmul_add_c;
80ba1ddb 3140 c->vector_fmul_window = vector_fmul_window_c;
0a68cd87 3141 c->vector_clipf = vector_clipf_c;
88c0536a 3142 c->scalarproduct_int16 = scalarproduct_int16_c;
b1159ad9 3143 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
e6e98234 3144 c->apply_window_int16 = apply_window_int16_c;
53b57211
MR
3145 c->scalarproduct_float = scalarproduct_float_c;
3146 c->butterflies_float = butterflies_float_c;
3147 c->vector_fmul_scalar = vector_fmul_scalar_c;
3148
3149 c->vector_fmul_sv_scalar[0] = vector_fmul_sv_scalar_2_c;
3150 c->vector_fmul_sv_scalar[1] = vector_fmul_sv_scalar_4_c;
3151
3152 c->sv_fmul_scalar[0] = sv_fmul_scalar_2_c;
3153 c->sv_fmul_scalar[1] = sv_fmul_scalar_4_c;
2dac4acf 3154
9686abb8 3155 c->shrink[0]= av_image_copy_plane;
54009d42
MN
3156 c->shrink[1]= ff_shrink22;
3157 c->shrink[2]= ff_shrink44;
3158 c->shrink[3]= ff_shrink88;
3159
513fbd8e
LM
3160 c->prefetch= just_return;
3161
2833fc46
LM
3162 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
3163 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
3164
49fb20cb
AJ
3165 if (HAVE_MMX) dsputil_init_mmx (c, avctx);
3166 if (ARCH_ARM) dsputil_init_arm (c, avctx);
3167 if (CONFIG_MLIB) dsputil_init_mlib (c, avctx);
3168 if (HAVE_VIS) dsputil_init_vis (c, avctx);
3169 if (ARCH_ALPHA) dsputil_init_alpha (c, avctx);
3170 if (ARCH_PPC) dsputil_init_ppc (c, avctx);
3171 if (HAVE_MMI) dsputil_init_mmi (c, avctx);
3172 if (ARCH_SH4) dsputil_init_sh4 (c, avctx);
3173 if (ARCH_BFIN) dsputil_init_bfin (c, avctx);
43f1708f 3174
2833fc46
LM
3175 for(i=0; i<64; i++){
3176 if(!c->put_2tap_qpel_pixels_tab[0][i])
3177 c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
3178 if(!c->avg_2tap_qpel_pixels_tab[0][i])
3179 c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
3180 }
3181
eca9e403
MR
3182 c->put_rv30_tpel_pixels_tab[0][0] = c->put_h264_qpel_pixels_tab[0][0];
3183 c->put_rv30_tpel_pixels_tab[1][0] = c->put_h264_qpel_pixels_tab[1][0];
3184 c->avg_rv30_tpel_pixels_tab[0][0] = c->avg_h264_qpel_pixels_tab[0][0];
3185 c->avg_rv30_tpel_pixels_tab[1][0] = c->avg_h264_qpel_pixels_tab[1][0];
3186
3187 c->put_rv40_qpel_pixels_tab[0][0] = c->put_h264_qpel_pixels_tab[0][0];
3188 c->put_rv40_qpel_pixels_tab[1][0] = c->put_h264_qpel_pixels_tab[1][0];
3189 c->avg_rv40_qpel_pixels_tab[0][0] = c->avg_h264_qpel_pixels_tab[0][0];
3190 c->avg_rv40_qpel_pixels_tab[1][0] = c->avg_h264_qpel_pixels_tab[1][0];
3191
b0368839
MN
3192 switch(c->idct_permutation_type){
3193 case FF_NO_IDCT_PERM:
3194 for(i=0; i<64; i++)
3195 c->idct_permutation[i]= i;
3196 break;
3197 case FF_LIBMPEG2_IDCT_PERM:
3198 for(i=0; i<64; i++)
3199 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
3200 break;
3201 case FF_SIMPLE_IDCT_PERM:
3202 for(i=0; i<64; i++)
3203 c->idct_permutation[i]= simple_mmx_permutation[i];
3204 break;
3205 case FF_TRANSPOSE_IDCT_PERM:
3206 for(i=0; i<64; i++)
3207 c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
3208 break;
5773a746
MN
3209 case FF_PARTTRANS_IDCT_PERM:
3210 for(i=0; i<64; i++)
3211 c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
3212 break;
0e956ba2
AS
3213 case FF_SSE2_IDCT_PERM:
3214 for(i=0; i<64; i++)
3215 c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
3216 break;
b0368839 3217 default:
9b879566 3218 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
b0368839 3219 }
57060b1e 3220}
b0368839 3221