Use LOCAL_ALIGNED in ff_check_alignment()
[libav.git] / libavcodec / dsputil.c
CommitLineData
de6d9b64
FB
1/*
2 * DSP utils
406792e7 3 * Copyright (c) 2000, 2001 Fabrice Bellard
8f2ab833 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
de6d9b64 5 *
7b94177e
DB
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
7 *
2912e87a 8 * This file is part of Libav.
b78e7197 9 *
2912e87a 10 * Libav is free software; you can redistribute it and/or
ff4ec49e
FB
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
b78e7197 13 * version 2.1 of the License, or (at your option) any later version.
de6d9b64 14 *
2912e87a 15 * Libav is distributed in the hope that it will be useful,
de6d9b64 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ff4ec49e
FB
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
de6d9b64 19 *
ff4ec49e 20 * You should have received a copy of the GNU Lesser General Public
2912e87a 21 * License along with Libav; if not, write to the Free Software
5509bffa 22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
de6d9b64 23 */
115329f1 24
983e3246 25/**
ba87f080 26 * @file
983e3246
MN
27 * DSP utils
28 */
115329f1 29
737eb597 30#include "libavutil/imgutils.h"
de6d9b64
FB
31#include "avcodec.h"
32#include "dsputil.h"
b0368839 33#include "simple_idct.h"
65e4c8c9 34#include "faandct.h"
6f08c541 35#include "faanidct.h"
199436b9 36#include "mathops.h"
af818f7a
DB
37#include "mpegvideo.h"
38#include "config.h"
3da11804
MR
39#include "ac3dec.h"
40#include "vorbis.h"
41#include "png.h"
5596c60c 42
55fde95e 43uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
1d503957 44uint32_t ff_squareTbl[512] = {0, };
de6d9b64 45
19a0729b
OA
46#define BIT_DEPTH 9
47#include "dsputil_template.c"
48#undef BIT_DEPTH
49
50#define BIT_DEPTH 10
51#include "dsputil_template.c"
52#undef BIT_DEPTH
53
54#define BIT_DEPTH 8
325eefa2
OA
55#include "dsputil_template.c"
56
917f55cc
LM
57// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
58#define pb_7f (~0UL/255 * 0x7f)
59#define pb_80 (~0UL/255 * 0x80)
469bd7b1 60
0c1a9eda 61const uint8_t ff_zigzag_direct[64] = {
2ad1516a
MN
62 0, 1, 8, 16, 9, 2, 3, 10,
63 17, 24, 32, 25, 18, 11, 4, 5,
e0eac44e 64 12, 19, 26, 33, 40, 48, 41, 34,
2ad1516a 65 27, 20, 13, 6, 7, 14, 21, 28,
e0eac44e
FB
66 35, 42, 49, 56, 57, 50, 43, 36,
67 29, 22, 15, 23, 30, 37, 44, 51,
68 58, 59, 52, 45, 38, 31, 39, 46,
69 53, 60, 61, 54, 47, 55, 62, 63
70};
71
10acc479
RS
72/* Specific zigzag scan for 248 idct. NOTE that unlike the
73 specification, we interleave the fields */
74const uint8_t ff_zigzag248_direct[64] = {
75 0, 8, 1, 9, 16, 24, 2, 10,
76 17, 25, 32, 40, 48, 56, 33, 41,
77 18, 26, 3, 11, 4, 12, 19, 27,
78 34, 42, 49, 57, 50, 58, 35, 43,
79 20, 28, 5, 13, 6, 14, 21, 29,
80 36, 44, 51, 59, 52, 60, 37, 45,
81 22, 30, 7, 15, 23, 31, 38, 46,
82 53, 61, 54, 62, 39, 47, 55, 63,
83};
84
2f349de2 85/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
84dc2d8a 86DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
2f349de2 87
0c1a9eda 88const uint8_t ff_alternate_horizontal_scan[64] = {
115329f1 89 0, 1, 2, 3, 8, 9, 16, 17,
e0eac44e 90 10, 11, 4, 5, 6, 7, 15, 14,
115329f1 91 13, 12, 19, 18, 24, 25, 32, 33,
e0eac44e 92 26, 27, 20, 21, 22, 23, 28, 29,
115329f1 93 30, 31, 34, 35, 40, 41, 48, 49,
e0eac44e 94 42, 43, 36, 37, 38, 39, 44, 45,
115329f1 95 46, 47, 50, 51, 56, 57, 58, 59,
e0eac44e
FB
96 52, 53, 54, 55, 60, 61, 62, 63,
97};
98
0c1a9eda 99const uint8_t ff_alternate_vertical_scan[64] = {
115329f1 100 0, 8, 16, 24, 1, 9, 2, 10,
e0eac44e 101 17, 25, 32, 40, 48, 56, 57, 49,
115329f1 102 41, 33, 26, 18, 3, 11, 4, 12,
e0eac44e 103 19, 27, 34, 42, 50, 58, 35, 43,
115329f1 104 51, 59, 20, 28, 5, 13, 6, 14,
e0eac44e 105 21, 29, 36, 44, 52, 60, 37, 45,
115329f1 106 53, 61, 22, 30, 7, 15, 23, 31,
e0eac44e
FB
107 38, 46, 54, 62, 39, 47, 55, 63,
108};
109
b0368839
MN
110/* Input permutation for the simple_idct_mmx */
111static const uint8_t simple_mmx_permutation[64]={
bb270c08
DB
112 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
113 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
114 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
115 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
116 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
117 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
118 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
119 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
b0368839
MN
120};
121
0e956ba2
AS
122static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
123
4c79b95c
AJ
124void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
125 int i;
126 int end;
127
128 st->scantable= src_scantable;
129
130 for(i=0; i<64; i++){
131 int j;
132 j = src_scantable[i];
133 st->permutated[i] = permutation[j];
b250f9c6 134#if ARCH_PPC
4c79b95c
AJ
135 st->inverse[j] = i;
136#endif
137 }
138
139 end=-1;
140 for(i=0; i<64; i++){
141 int j;
142 j = st->permutated[i];
143 if(j>end) end=j;
144 st->raster_end[i]= end;
145 }
146}
147
0c1a9eda 148static int pix_sum_c(uint8_t * pix, int line_size)
3aa102be
MN
149{
150 int s, i, j;
151
152 s = 0;
153 for (i = 0; i < 16; i++) {
bb270c08
DB
154 for (j = 0; j < 16; j += 8) {
155 s += pix[0];
156 s += pix[1];
157 s += pix[2];
158 s += pix[3];
159 s += pix[4];
160 s += pix[5];
161 s += pix[6];
162 s += pix[7];
163 pix += 8;
164 }
165 pix += line_size - 16;
3aa102be
MN
166 }
167 return s;
168}
169
0c1a9eda 170static int pix_norm1_c(uint8_t * pix, int line_size)
3aa102be
MN
171{
172 int s, i, j;
1d503957 173 uint32_t *sq = ff_squareTbl + 256;
3aa102be
MN
174
175 s = 0;
176 for (i = 0; i < 16; i++) {
bb270c08 177 for (j = 0; j < 16; j += 8) {
2a006cd3 178#if 0
bb270c08
DB
179 s += sq[pix[0]];
180 s += sq[pix[1]];
181 s += sq[pix[2]];
182 s += sq[pix[3]];
183 s += sq[pix[4]];
184 s += sq[pix[5]];
185 s += sq[pix[6]];
186 s += sq[pix[7]];
2a006cd3
FL
187#else
188#if LONG_MAX > 2147483647
bb270c08
DB
189 register uint64_t x=*(uint64_t*)pix;
190 s += sq[x&0xff];
191 s += sq[(x>>8)&0xff];
192 s += sq[(x>>16)&0xff];
193 s += sq[(x>>24)&0xff];
2a006cd3
FL
194 s += sq[(x>>32)&0xff];
195 s += sq[(x>>40)&0xff];
196 s += sq[(x>>48)&0xff];
197 s += sq[(x>>56)&0xff];
198#else
bb270c08
DB
199 register uint32_t x=*(uint32_t*)pix;
200 s += sq[x&0xff];
201 s += sq[(x>>8)&0xff];
202 s += sq[(x>>16)&0xff];
203 s += sq[(x>>24)&0xff];
2a006cd3
FL
204 x=*(uint32_t*)(pix+4);
205 s += sq[x&0xff];
206 s += sq[(x>>8)&0xff];
207 s += sq[(x>>16)&0xff];
208 s += sq[(x>>24)&0xff];
209#endif
210#endif
bb270c08
DB
211 pix += 8;
212 }
213 pix += line_size - 16;
3aa102be
MN
214 }
215 return s;
216}
217
96711ecf 218static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
3d2e8cce 219 int i;
115329f1 220
3d2e8cce 221 for(i=0; i+8<=w; i+=8){
8fc0162a
MR
222 dst[i+0]= av_bswap32(src[i+0]);
223 dst[i+1]= av_bswap32(src[i+1]);
224 dst[i+2]= av_bswap32(src[i+2]);
225 dst[i+3]= av_bswap32(src[i+3]);
226 dst[i+4]= av_bswap32(src[i+4]);
227 dst[i+5]= av_bswap32(src[i+5]);
228 dst[i+6]= av_bswap32(src[i+6]);
229 dst[i+7]= av_bswap32(src[i+7]);
3d2e8cce
MN
230 }
231 for(;i<w; i++){
8fc0162a 232 dst[i+0]= av_bswap32(src[i+0]);
3d2e8cce
MN
233 }
234}
3aa102be 235
381d37fd
MR
236static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
237{
238 while (len--)
239 *dst++ = av_bswap16(*src++);
240}
241
26efc54e
MN
242static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
243{
244 int s, i;
1d503957 245 uint32_t *sq = ff_squareTbl + 256;
26efc54e
MN
246
247 s = 0;
248 for (i = 0; i < h; i++) {
249 s += sq[pix1[0] - pix2[0]];
250 s += sq[pix1[1] - pix2[1]];
251 s += sq[pix1[2] - pix2[2]];
252 s += sq[pix1[3] - pix2[3]];
253 pix1 += line_size;
254 pix2 += line_size;
255 }
256 return s;
257}
258
bb198e19 259static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
1457ab52
MN
260{
261 int s, i;
1d503957 262 uint32_t *sq = ff_squareTbl + 256;
1457ab52
MN
263
264 s = 0;
bb198e19 265 for (i = 0; i < h; i++) {
1457ab52
MN
266 s += sq[pix1[0] - pix2[0]];
267 s += sq[pix1[1] - pix2[1]];
268 s += sq[pix1[2] - pix2[2]];
269 s += sq[pix1[3] - pix2[3]];
270 s += sq[pix1[4] - pix2[4]];
271 s += sq[pix1[5] - pix2[5]];
272 s += sq[pix1[6] - pix2[6]];
273 s += sq[pix1[7] - pix2[7]];
274 pix1 += line_size;
275 pix2 += line_size;
276 }
277 return s;
278}
279
bb198e19 280static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
9c76bd48 281{
6b026927 282 int s, i;
1d503957 283 uint32_t *sq = ff_squareTbl + 256;
9c76bd48
BF
284
285 s = 0;
bb198e19 286 for (i = 0; i < h; i++) {
6b026927
FH
287 s += sq[pix1[ 0] - pix2[ 0]];
288 s += sq[pix1[ 1] - pix2[ 1]];
289 s += sq[pix1[ 2] - pix2[ 2]];
290 s += sq[pix1[ 3] - pix2[ 3]];
291 s += sq[pix1[ 4] - pix2[ 4]];
292 s += sq[pix1[ 5] - pix2[ 5]];
293 s += sq[pix1[ 6] - pix2[ 6]];
294 s += sq[pix1[ 7] - pix2[ 7]];
295 s += sq[pix1[ 8] - pix2[ 8]];
296 s += sq[pix1[ 9] - pix2[ 9]];
297 s += sq[pix1[10] - pix2[10]];
298 s += sq[pix1[11] - pix2[11]];
299 s += sq[pix1[12] - pix2[12]];
300 s += sq[pix1[13] - pix2[13]];
301 s += sq[pix1[14] - pix2[14]];
302 s += sq[pix1[15] - pix2[15]];
2a006cd3 303
6b026927
FH
304 pix1 += line_size;
305 pix2 += line_size;
9c76bd48
BF
306 }
307 return s;
308}
309
0c1a9eda 310static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
de6d9b64 311{
de6d9b64
FB
312 int i;
313
314 /* read the pixels */
de6d9b64 315 for(i=0;i<8;i++) {
c13e1abd
FH
316 block[0] = pixels[0];
317 block[1] = pixels[1];
318 block[2] = pixels[2];
319 block[3] = pixels[3];
320 block[4] = pixels[4];
321 block[5] = pixels[5];
322 block[6] = pixels[6];
323 block[7] = pixels[7];
324 pixels += line_size;
325 block += 8;
de6d9b64
FB
326 }
327}
328
0c1a9eda 329static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
bb270c08 330 const uint8_t *s2, int stride){
9dbcbd92
MN
331 int i;
332
333 /* read the pixels */
9dbcbd92 334 for(i=0;i<8;i++) {
c13e1abd
FH
335 block[0] = s1[0] - s2[0];
336 block[1] = s1[1] - s2[1];
337 block[2] = s1[2] - s2[2];
338 block[3] = s1[3] - s2[3];
339 block[4] = s1[4] - s2[4];
340 block[5] = s1[5] - s2[5];
341 block[6] = s1[6] - s2[6];
342 block[7] = s1[7] - s2[7];
9dbcbd92
MN
343 s1 += stride;
344 s2 += stride;
c13e1abd 345 block += 8;
9dbcbd92
MN
346 }
347}
348
349
484a337c
RB
350void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
351 int line_size)
de6d9b64 352{
de6d9b64 353 int i;
55fde95e 354 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 355
de6d9b64 356 /* read the pixels */
de6d9b64 357 for(i=0;i<8;i++) {
c13e1abd
FH
358 pixels[0] = cm[block[0]];
359 pixels[1] = cm[block[1]];
360 pixels[2] = cm[block[2]];
361 pixels[3] = cm[block[3]];
362 pixels[4] = cm[block[4]];
363 pixels[5] = cm[block[5]];
364 pixels[6] = cm[block[6]];
365 pixels[7] = cm[block[7]];
366
367 pixels += line_size;
368 block += 8;
de6d9b64
FB
369 }
370}
371
178fcca8 372static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
bb270c08 373 int line_size)
178fcca8
MN
374{
375 int i;
55fde95e 376 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 377
178fcca8
MN
378 /* read the pixels */
379 for(i=0;i<4;i++) {
380 pixels[0] = cm[block[0]];
381 pixels[1] = cm[block[1]];
382 pixels[2] = cm[block[2]];
383 pixels[3] = cm[block[3]];
384
385 pixels += line_size;
386 block += 8;
387 }
388}
389
9ca358b9 390static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
bb270c08 391 int line_size)
9ca358b9
MN
392{
393 int i;
55fde95e 394 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 395
9ca358b9
MN
396 /* read the pixels */
397 for(i=0;i<2;i++) {
398 pixels[0] = cm[block[0]];
399 pixels[1] = cm[block[1]];
400
401 pixels += line_size;
402 block += 8;
403 }
404}
405
484a337c
RB
406void ff_put_signed_pixels_clamped_c(const DCTELEM *block,
407 uint8_t *restrict pixels,
408 int line_size)
f9ed9d85
MM
409{
410 int i, j;
411
412 for (i = 0; i < 8; i++) {
413 for (j = 0; j < 8; j++) {
414 if (*block < -128)
415 *pixels = 0;
416 else if (*block > 127)
417 *pixels = 255;
418 else
419 *pixels = (uint8_t)(*block + 128);
420 block++;
421 pixels++;
422 }
423 pixels += (line_size - 8);
424 }
425}
426
342c7dfd
KS
427static void put_pixels_nonclamped_c(const DCTELEM *block, uint8_t *restrict pixels,
428 int line_size)
429{
430 int i;
431
432 /* read the pixels */
433 for(i=0;i<8;i++) {
434 pixels[0] = block[0];
435 pixels[1] = block[1];
436 pixels[2] = block[2];
437 pixels[3] = block[3];
438 pixels[4] = block[4];
439 pixels[5] = block[5];
440 pixels[6] = block[6];
441 pixels[7] = block[7];
442
443 pixels += line_size;
444 block += 8;
445 }
446}
447
484a337c
RB
448void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
449 int line_size)
de6d9b64 450{
de6d9b64 451 int i;
55fde95e 452 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 453
de6d9b64 454 /* read the pixels */
de6d9b64 455 for(i=0;i<8;i++) {
c13e1abd
FH
456 pixels[0] = cm[pixels[0] + block[0]];
457 pixels[1] = cm[pixels[1] + block[1]];
458 pixels[2] = cm[pixels[2] + block[2]];
459 pixels[3] = cm[pixels[3] + block[3]];
460 pixels[4] = cm[pixels[4] + block[4]];
461 pixels[5] = cm[pixels[5] + block[5]];
462 pixels[6] = cm[pixels[6] + block[6]];
463 pixels[7] = cm[pixels[7] + block[7]];
464 pixels += line_size;
465 block += 8;
de6d9b64
FB
466 }
467}
178fcca8
MN
468
469static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
470 int line_size)
471{
472 int i;
55fde95e 473 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 474
178fcca8
MN
475 /* read the pixels */
476 for(i=0;i<4;i++) {
477 pixels[0] = cm[pixels[0] + block[0]];
478 pixels[1] = cm[pixels[1] + block[1]];
479 pixels[2] = cm[pixels[2] + block[2]];
480 pixels[3] = cm[pixels[3] + block[3]];
481 pixels += line_size;
482 block += 8;
483 }
484}
9ca358b9
MN
485
486static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
487 int line_size)
488{
489 int i;
55fde95e 490 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 491
9ca358b9
MN
492 /* read the pixels */
493 for(i=0;i<2;i++) {
494 pixels[0] = cm[pixels[0] + block[0]];
495 pixels[1] = cm[pixels[1] + block[1]];
496 pixels += line_size;
497 block += 8;
498 }
499}
36940eca 500
1edbfe19
LM
501static int sum_abs_dctelem_c(DCTELEM *block)
502{
503 int sum=0, i;
504 for(i=0; i<64; i++)
505 sum+= FFABS(block[i]);
506 return sum;
507}
508
342c7dfd
KS
509static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
510{
511 int i;
512
513 for (i = 0; i < h; i++) {
514 memset(block, value, 16);
515 block += line_size;
516 }
517}
518
519static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
520{
521 int i;
522
523 for (i = 0; i < h; i++) {
524 memset(block, value, 8);
525 block += line_size;
526 }
527}
528
529static void scale_block_c(const uint8_t src[64]/*align 8*/, uint8_t *dst/*align 8*/, int linesize)
530{
531 int i, j;
2023cfea
MR
532 uint16_t *dst1 = (uint16_t *) dst;
533 uint16_t *dst2 = (uint16_t *)(dst + linesize);
342c7dfd
KS
534
535 for (j = 0; j < 8; j++) {
536 for (i = 0; i < 8; i++) {
537 dst1[i] = dst2[i] = src[i] * 0x0101;
538 }
539 src += 8;
540 dst1 += linesize;
541 dst2 += linesize;
542 }
543}
544
de6d9b64
FB
545#define avg2(a,b) ((a+b+1)>>1)
546#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
547
0c1a9eda 548static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
44eb4951
MN
549{
550 const int A=(16-x16)*(16-y16);
551 const int B=( x16)*(16-y16);
552 const int C=(16-x16)*( y16);
553 const int D=( x16)*( y16);
554 int i;
44eb4951
MN
555
556 for(i=0; i<h; i++)
557 {
b3184779
MN
558 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
559 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
560 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
561 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
562 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
563 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
564 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
565 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
566 dst+= stride;
567 src+= stride;
44eb4951
MN
568 }
569}
570
703c8195 571void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
073b013d
MN
572 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
573{
574 int y, vx, vy;
575 const int s= 1<<shift;
115329f1 576
073b013d
MN
577 width--;
578 height--;
579
580 for(y=0; y<h; y++){
581 int x;
582
583 vx= ox;
584 vy= oy;
585 for(x=0; x<8; x++){ //XXX FIXME optimize
586 int src_x, src_y, frac_x, frac_y, index;
587
588 src_x= vx>>16;
589 src_y= vy>>16;
590 frac_x= src_x&(s-1);
591 frac_y= src_y&(s-1);
592 src_x>>=shift;
593 src_y>>=shift;
115329f1 594
073b013d
MN
595 if((unsigned)src_x < width){
596 if((unsigned)src_y < height){
597 index= src_x + src_y*stride;
598 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
599 + src[index +1]* frac_x )*(s-frac_y)
600 + ( src[index+stride ]*(s-frac_x)
601 + src[index+stride+1]* frac_x )* frac_y
602 + r)>>(shift*2);
603 }else{
f66e4f5f 604 index= src_x + av_clip(src_y, 0, height)*stride;
115329f1 605 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
073b013d
MN
606 + src[index +1]* frac_x )*s
607 + r)>>(shift*2);
608 }
609 }else{
610 if((unsigned)src_y < height){
f66e4f5f 611 index= av_clip(src_x, 0, width) + src_y*stride;
115329f1 612 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
073b013d
MN
613 + src[index+stride ]* frac_y )*s
614 + r)>>(shift*2);
615 }else{
f66e4f5f 616 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
073b013d
MN
617 dst[y*stride + x]= src[index ];
618 }
619 }
115329f1 620
073b013d
MN
621 vx+= dxx;
622 vy+= dyx;
623 }
624 ox += dxy;
625 oy += dyy;
626 }
627}
669ac79c
MN
628
629static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
630 switch(width){
19a0729b
OA
631 case 2: put_pixels2_8_c (dst, src, stride, height); break;
632 case 4: put_pixels4_8_c (dst, src, stride, height); break;
633 case 8: put_pixels8_8_c (dst, src, stride, height); break;
634 case 16:put_pixels16_8_c(dst, src, stride, height); break;
669ac79c
MN
635 }
636}
637
638static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
639 int i,j;
640 for (i=0; i < height; i++) {
641 for (j=0; j < width; j++) {
bb270c08 642 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
669ac79c
MN
643 }
644 src += stride;
645 dst += stride;
646 }
647}
648
649static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
650 int i,j;
651 for (i=0; i < height; i++) {
652 for (j=0; j < width; j++) {
bb270c08 653 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
669ac79c
MN
654 }
655 src += stride;
656 dst += stride;
657 }
658}
115329f1 659
669ac79c
MN
660static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
661 int i,j;
662 for (i=0; i < height; i++) {
663 for (j=0; j < width; j++) {
bb270c08 664 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
669ac79c
MN
665 }
666 src += stride;
667 dst += stride;
668 }
669}
115329f1 670
669ac79c
MN
671static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
672 int i,j;
673 for (i=0; i < height; i++) {
674 for (j=0; j < width; j++) {
bb270c08 675 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
676 }
677 src += stride;
678 dst += stride;
679 }
680}
681
682static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
683 int i,j;
684 for (i=0; i < height; i++) {
685 for (j=0; j < width; j++) {
bb270c08 686 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
687 }
688 src += stride;
689 dst += stride;
690 }
691}
692
693static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
694 int i,j;
695 for (i=0; i < height; i++) {
696 for (j=0; j < width; j++) {
bb270c08 697 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
669ac79c
MN
698 }
699 src += stride;
700 dst += stride;
701 }
702}
703
704static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
705 int i,j;
706 for (i=0; i < height; i++) {
707 for (j=0; j < width; j++) {
bb270c08 708 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
709 }
710 src += stride;
711 dst += stride;
712 }
713}
714
715static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
716 int i,j;
717 for (i=0; i < height; i++) {
718 for (j=0; j < width; j++) {
bb270c08 719 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
720 }
721 src += stride;
722 dst += stride;
723 }
724}
da3b9756
MM
725
726static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
727 switch(width){
19a0729b
OA
728 case 2: avg_pixels2_8_c (dst, src, stride, height); break;
729 case 4: avg_pixels4_8_c (dst, src, stride, height); break;
730 case 8: avg_pixels8_8_c (dst, src, stride, height); break;
731 case 16:avg_pixels16_8_c(dst, src, stride, height); break;
da3b9756
MM
732 }
733}
734
735static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
736 int i,j;
737 for (i=0; i < height; i++) {
738 for (j=0; j < width; j++) {
bb270c08 739 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
740 }
741 src += stride;
742 dst += stride;
743 }
744}
745
746static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
747 int i,j;
748 for (i=0; i < height; i++) {
749 for (j=0; j < width; j++) {
bb270c08 750 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
751 }
752 src += stride;
753 dst += stride;
754 }
755}
115329f1 756
da3b9756
MM
757static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
758 int i,j;
759 for (i=0; i < height; i++) {
760 for (j=0; j < width; j++) {
bb270c08 761 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
762 }
763 src += stride;
764 dst += stride;
765 }
766}
115329f1 767
da3b9756
MM
768static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
769 int i,j;
770 for (i=0; i < height; i++) {
771 for (j=0; j < width; j++) {
bb270c08 772 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
773 }
774 src += stride;
775 dst += stride;
776 }
777}
778
779static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
780 int i,j;
781 for (i=0; i < height; i++) {
782 for (j=0; j < width; j++) {
bb270c08 783 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
784 }
785 src += stride;
786 dst += stride;
787 }
788}
789
790static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
791 int i,j;
792 for (i=0; i < height; i++) {
793 for (j=0; j < width; j++) {
bb270c08 794 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
795 }
796 src += stride;
797 dst += stride;
798 }
799}
800
801static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
802 int i,j;
803 for (i=0; i < height; i++) {
804 for (j=0; j < width; j++) {
bb270c08 805 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
806 }
807 src += stride;
808 dst += stride;
809 }
810}
811
812static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
813 int i,j;
814 for (i=0; i < height; i++) {
815 for (j=0; j < width; j++) {
bb270c08 816 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
817 }
818 src += stride;
819 dst += stride;
820 }
821}
669ac79c
MN
822#if 0
823#define TPEL_WIDTH(width)\
824static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
825 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
826static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
827 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
828static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
829 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
830static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
831 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
832static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
833 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
834static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
835 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
836static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
837 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
838static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
839 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
840static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
841 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
842#endif
843
b3184779 844#define QPEL_MC(r, OPNAME, RND, OP) \
0c1a9eda 845static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
55fde95e 846 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779
MN
847 int i;\
848 for(i=0; i<h; i++)\
849 {\
850 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
851 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
852 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
853 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
854 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
855 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
856 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
857 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
858 dst+=dstStride;\
859 src+=srcStride;\
860 }\
44eb4951
MN
861}\
862\
0c1a9eda 863static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
db794953 864 const int w=8;\
55fde95e 865 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779
MN
866 int i;\
867 for(i=0; i<w; i++)\
868 {\
869 const int src0= src[0*srcStride];\
870 const int src1= src[1*srcStride];\
871 const int src2= src[2*srcStride];\
872 const int src3= src[3*srcStride];\
873 const int src4= src[4*srcStride];\
874 const int src5= src[5*srcStride];\
875 const int src6= src[6*srcStride];\
876 const int src7= src[7*srcStride];\
877 const int src8= src[8*srcStride];\
878 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
879 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
880 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
881 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
882 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
883 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
884 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
885 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
886 dst++;\
887 src++;\
888 }\
889}\
890\
0c1a9eda 891static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
55fde95e 892 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779 893 int i;\
826f429a 894 \
b3184779
MN
895 for(i=0; i<h; i++)\
896 {\
897 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
898 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
899 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
900 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
901 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
902 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
903 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
904 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
905 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
906 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
907 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
908 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
909 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
910 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
911 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
912 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
913 dst+=dstStride;\
914 src+=srcStride;\
915 }\
916}\
917\
0c1a9eda 918static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
55fde95e 919 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779 920 int i;\
826f429a 921 const int w=16;\
b3184779
MN
922 for(i=0; i<w; i++)\
923 {\
924 const int src0= src[0*srcStride];\
925 const int src1= src[1*srcStride];\
926 const int src2= src[2*srcStride];\
927 const int src3= src[3*srcStride];\
928 const int src4= src[4*srcStride];\
929 const int src5= src[5*srcStride];\
930 const int src6= src[6*srcStride];\
931 const int src7= src[7*srcStride];\
932 const int src8= src[8*srcStride];\
933 const int src9= src[9*srcStride];\
934 const int src10= src[10*srcStride];\
935 const int src11= src[11*srcStride];\
936 const int src12= src[12*srcStride];\
937 const int src13= src[13*srcStride];\
938 const int src14= src[14*srcStride];\
939 const int src15= src[15*srcStride];\
940 const int src16= src[16*srcStride];\
941 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
942 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
943 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
944 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
945 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
946 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
947 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
948 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
949 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
950 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
951 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
952 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
953 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
954 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
955 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
956 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
957 dst++;\
958 src++;\
959 }\
960}\
961\
0c1a9eda
ZK
962static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
963 uint8_t half[64];\
b3184779 964 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
19a0729b 965 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
44eb4951
MN
966}\
967\
0c1a9eda 968static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
b3184779 969 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
44eb4951
MN
970}\
971\
0c1a9eda
ZK
972static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
973 uint8_t half[64];\
b3184779 974 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
19a0729b 975 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
44eb4951
MN
976}\
977\
0c1a9eda
ZK
978static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
979 uint8_t full[16*9];\
980 uint8_t half[64];\
b3184779 981 copy_block9(full, src, 16, stride, 9);\
db794953 982 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
19a0729b 983 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
44eb4951
MN
984}\
985\
0c1a9eda
ZK
986static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
987 uint8_t full[16*9];\
b3184779 988 copy_block9(full, src, 16, stride, 9);\
db794953 989 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
44eb4951
MN
990}\
991\
0c1a9eda
ZK
992static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
993 uint8_t full[16*9];\
994 uint8_t half[64];\
b3184779 995 copy_block9(full, src, 16, stride, 9);\
db794953 996 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
19a0729b 997 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
44eb4951 998}\
0c1a9eda
ZK
999void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1000 uint8_t full[16*9];\
1001 uint8_t halfH[72];\
1002 uint8_t halfV[64];\
1003 uint8_t halfHV[64];\
b3184779
MN
1004 copy_block9(full, src, 16, stride, 9);\
1005 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1006 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1007 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 1008 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1009}\
0c1a9eda
ZK
1010static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1011 uint8_t full[16*9];\
1012 uint8_t halfH[72];\
1013 uint8_t halfHV[64];\
db794953
MN
1014 copy_block9(full, src, 16, stride, 9);\
1015 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
19a0729b 1016 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
db794953 1017 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 1018 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
db794953 1019}\
0c1a9eda
ZK
1020void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1021 uint8_t full[16*9];\
1022 uint8_t halfH[72];\
1023 uint8_t halfV[64];\
1024 uint8_t halfHV[64];\
b3184779
MN
1025 copy_block9(full, src, 16, stride, 9);\
1026 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1027 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1028 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 1029 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1030}\
0c1a9eda
ZK
1031static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1032 uint8_t full[16*9];\
1033 uint8_t halfH[72];\
1034 uint8_t halfHV[64];\
db794953
MN
1035 copy_block9(full, src, 16, stride, 9);\
1036 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
19a0729b 1037 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
db794953 1038 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 1039 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
db794953 1040}\
0c1a9eda
ZK
1041void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1042 uint8_t full[16*9];\
1043 uint8_t halfH[72];\
1044 uint8_t halfV[64];\
1045 uint8_t halfHV[64];\
b3184779
MN
1046 copy_block9(full, src, 16, stride, 9);\
1047 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1048 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1049 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 1050 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1051}\
0c1a9eda
ZK
1052static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1053 uint8_t full[16*9];\
1054 uint8_t halfH[72];\
1055 uint8_t halfHV[64];\
db794953
MN
1056 copy_block9(full, src, 16, stride, 9);\
1057 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
19a0729b 1058 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
db794953 1059 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 1060 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
db794953 1061}\
0c1a9eda
ZK
1062void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1063 uint8_t full[16*9];\
1064 uint8_t halfH[72];\
1065 uint8_t halfV[64];\
1066 uint8_t halfHV[64];\
b3184779
MN
1067 copy_block9(full, src, 16, stride, 9);\
1068 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
db794953
MN
1069 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1070 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 1071 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1072}\
0c1a9eda
ZK
1073static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1074 uint8_t full[16*9];\
1075 uint8_t halfH[72];\
1076 uint8_t halfHV[64];\
db794953
MN
1077 copy_block9(full, src, 16, stride, 9);\
1078 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
19a0729b 1079 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
db794953 1080 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 1081 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
db794953 1082}\
0c1a9eda
ZK
1083static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1084 uint8_t halfH[72];\
1085 uint8_t halfHV[64];\
b3184779 1086 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1087 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 1088 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
44eb4951 1089}\
0c1a9eda
ZK
1090static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1091 uint8_t halfH[72];\
1092 uint8_t halfHV[64];\
b3184779 1093 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1094 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 1095 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
44eb4951 1096}\
0c1a9eda
ZK
1097void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1098 uint8_t full[16*9];\
1099 uint8_t halfH[72];\
1100 uint8_t halfV[64];\
1101 uint8_t halfHV[64];\
b3184779
MN
1102 copy_block9(full, src, 16, stride, 9);\
1103 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1104 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1105 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 1106 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 1107}\
0c1a9eda
ZK
1108static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1109 uint8_t full[16*9];\
1110 uint8_t halfH[72];\
db794953
MN
1111 copy_block9(full, src, 16, stride, 9);\
1112 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
19a0729b 1113 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
db794953
MN
1114 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1115}\
0c1a9eda
ZK
1116void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1117 uint8_t full[16*9];\
1118 uint8_t halfH[72];\
1119 uint8_t halfV[64];\
1120 uint8_t halfHV[64];\
b3184779
MN
1121 copy_block9(full, src, 16, stride, 9);\
1122 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1123 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1124 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 1125 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 1126}\
0c1a9eda
ZK
1127static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1128 uint8_t full[16*9];\
1129 uint8_t halfH[72];\
db794953
MN
1130 copy_block9(full, src, 16, stride, 9);\
1131 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
19a0729b 1132 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
db794953
MN
1133 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1134}\
0c1a9eda
ZK
1135static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1136 uint8_t halfH[72];\
b3184779 1137 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1138 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
b3184779 1139}\
b3184779 1140\
0c1a9eda
ZK
1141static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1142 uint8_t half[256];\
b3184779 1143 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
19a0729b 1144 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
b3184779
MN
1145}\
1146\
0c1a9eda 1147static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
b3184779 1148 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
44eb4951 1149}\
b3184779 1150\
0c1a9eda
ZK
1151static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1152 uint8_t half[256];\
b3184779 1153 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
19a0729b 1154 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
b3184779
MN
1155}\
1156\
0c1a9eda
ZK
1157static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1158 uint8_t full[24*17];\
1159 uint8_t half[256];\
b3184779 1160 copy_block17(full, src, 24, stride, 17);\
826f429a 1161 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
19a0729b 1162 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
b3184779
MN
1163}\
1164\
0c1a9eda
ZK
1165static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1166 uint8_t full[24*17];\
b3184779 1167 copy_block17(full, src, 24, stride, 17);\
826f429a 1168 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
b3184779
MN
1169}\
1170\
0c1a9eda
ZK
1171static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1172 uint8_t full[24*17];\
1173 uint8_t half[256];\
b3184779 1174 copy_block17(full, src, 24, stride, 17);\
826f429a 1175 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
19a0729b 1176 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
b3184779 1177}\
0c1a9eda
ZK
1178void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1179 uint8_t full[24*17];\
1180 uint8_t halfH[272];\
1181 uint8_t halfV[256];\
1182 uint8_t halfHV[256];\
b3184779
MN
1183 copy_block17(full, src, 24, stride, 17);\
1184 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1185 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1186 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1187 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
b3184779 1188}\
0c1a9eda
ZK
1189static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1190 uint8_t full[24*17];\
1191 uint8_t halfH[272];\
1192 uint8_t halfHV[256];\
db794953
MN
1193 copy_block17(full, src, 24, stride, 17);\
1194 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
19a0729b 1195 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
db794953 1196 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1197 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
db794953 1198}\
0c1a9eda
ZK
1199void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1200 uint8_t full[24*17];\
1201 uint8_t halfH[272];\
1202 uint8_t halfV[256];\
1203 uint8_t halfHV[256];\
b3184779
MN
1204 copy_block17(full, src, 24, stride, 17);\
1205 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1206 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1207 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1208 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
b3184779 1209}\
0c1a9eda
ZK
1210static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1211 uint8_t full[24*17];\
1212 uint8_t halfH[272];\
1213 uint8_t halfHV[256];\
db794953
MN
1214 copy_block17(full, src, 24, stride, 17);\
1215 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
19a0729b 1216 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
db794953 1217 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1218 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
db794953 1219}\
0c1a9eda
ZK
1220void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1221 uint8_t full[24*17];\
1222 uint8_t halfH[272];\
1223 uint8_t halfV[256];\
1224 uint8_t halfHV[256];\
b3184779
MN
1225 copy_block17(full, src, 24, stride, 17);\
1226 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1227 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1228 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1229 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
b3184779 1230}\
0c1a9eda
ZK
1231static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1232 uint8_t full[24*17];\
1233 uint8_t halfH[272];\
1234 uint8_t halfHV[256];\
db794953
MN
1235 copy_block17(full, src, 24, stride, 17);\
1236 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
19a0729b 1237 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
db794953 1238 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1239 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
db794953 1240}\
0c1a9eda
ZK
1241void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1242 uint8_t full[24*17];\
1243 uint8_t halfH[272];\
1244 uint8_t halfV[256];\
1245 uint8_t halfHV[256];\
b3184779
MN
1246 copy_block17(full, src, 24, stride, 17);\
1247 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
826f429a
MN
1248 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1249 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1250 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
b3184779 1251}\
0c1a9eda
ZK
1252static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1253 uint8_t full[24*17];\
1254 uint8_t halfH[272];\
1255 uint8_t halfHV[256];\
db794953
MN
1256 copy_block17(full, src, 24, stride, 17);\
1257 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
19a0729b 1258 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
db794953 1259 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1260 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
db794953 1261}\
0c1a9eda
ZK
1262static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1263 uint8_t halfH[272];\
1264 uint8_t halfHV[256];\
b3184779 1265 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1266 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1267 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
b3184779 1268}\
0c1a9eda
ZK
1269static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1270 uint8_t halfH[272];\
1271 uint8_t halfHV[256];\
b3184779 1272 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1273 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1274 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
b3184779 1275}\
0c1a9eda
ZK
1276void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1277 uint8_t full[24*17];\
1278 uint8_t halfH[272];\
1279 uint8_t halfV[256];\
1280 uint8_t halfHV[256];\
b3184779
MN
1281 copy_block17(full, src, 24, stride, 17);\
1282 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1283 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1284 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1285 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
b3184779 1286}\
0c1a9eda
ZK
1287static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1288 uint8_t full[24*17];\
1289 uint8_t halfH[272];\
db794953
MN
1290 copy_block17(full, src, 24, stride, 17);\
1291 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
19a0729b 1292 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
db794953
MN
1293 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1294}\
0c1a9eda
ZK
1295void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1296 uint8_t full[24*17];\
1297 uint8_t halfH[272];\
1298 uint8_t halfV[256];\
1299 uint8_t halfHV[256];\
b3184779
MN
1300 copy_block17(full, src, 24, stride, 17);\
1301 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1302 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1303 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1304 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
b3184779 1305}\
0c1a9eda
ZK
1306static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1307 uint8_t full[24*17];\
1308 uint8_t halfH[272];\
db794953
MN
1309 copy_block17(full, src, 24, stride, 17);\
1310 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
19a0729b 1311 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
db794953
MN
1312 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1313}\
0c1a9eda
ZK
1314static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1315 uint8_t halfH[272];\
b3184779 1316 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1317 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
45553457 1318}
44eb4951 1319
b3184779
MN
1320#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1321#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1322#define op_put(a, b) a = cm[((b) + 16)>>5]
1323#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1324
1325QPEL_MC(0, put_ , _ , op_put)
1326QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1327QPEL_MC(0, avg_ , _ , op_avg)
1328//QPEL_MC(1, avg_no_rnd , _ , op_avg)
1329#undef op_avg
1330#undef op_avg_no_rnd
1331#undef op_put
1332#undef op_put_no_rnd
44eb4951 1333
3d1b1caa
MR
1334#define put_qpel8_mc00_c ff_put_pixels8x8_c
1335#define avg_qpel8_mc00_c ff_avg_pixels8x8_c
1336#define put_qpel16_mc00_c ff_put_pixels16x16_c
1337#define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1338#define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
19a0729b 1339#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
3d1b1caa 1340
1457ab52 1341static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
55fde95e 1342 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1457ab52
MN
1343 int i;
1344
1345 for(i=0; i<h; i++){
1346 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1347 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1348 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1349 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1350 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1351 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1352 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1353 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1354 dst+=dstStride;
115329f1 1355 src+=srcStride;
1457ab52
MN
1356 }
1357}
1358
b250f9c6 1359#if CONFIG_RV40_DECODER
2d8a0815 1360static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
19a0729b 1361 put_pixels16_xy2_8_c(dst, src, stride, 16);
2d8a0815
KS
1362}
1363static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
19a0729b 1364 avg_pixels16_xy2_8_c(dst, src, stride, 16);
2d8a0815
KS
1365}
1366static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
19a0729b 1367 put_pixels8_xy2_8_c(dst, src, stride, 8);
2d8a0815
KS
1368}
1369static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
19a0729b 1370 avg_pixels8_xy2_8_c(dst, src, stride, 8);
2d8a0815 1371}
2d8a0815
KS
1372#endif /* CONFIG_RV40_DECODER */
1373
1457ab52 1374static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
55fde95e 1375 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1457ab52
MN
1376 int i;
1377
1378 for(i=0; i<w; i++){
1379 const int src_1= src[ -srcStride];
1380 const int src0 = src[0 ];
1381 const int src1 = src[ srcStride];
1382 const int src2 = src[2*srcStride];
1383 const int src3 = src[3*srcStride];
1384 const int src4 = src[4*srcStride];
1385 const int src5 = src[5*srcStride];
1386 const int src6 = src[6*srcStride];
1387 const int src7 = src[7*srcStride];
1388 const int src8 = src[8*srcStride];
1389 const int src9 = src[9*srcStride];
1390 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1391 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1392 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1393 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1394 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1395 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1396 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1397 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1398 src++;
1399 dst++;
1400 }
1401}
1402
1457ab52
MN
1403static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
1404 uint8_t half[64];
1405 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
19a0729b 1406 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
1457ab52
MN
1407}
1408
1409static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
1410 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
1411}
1412
1413static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
1414 uint8_t half[64];
1415 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
19a0729b 1416 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
1457ab52
MN
1417}
1418
1419static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
1420 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
1421}
1422
1423static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
1424 uint8_t halfH[88];
1425 uint8_t halfV[64];
1426 uint8_t halfHV[64];
1427 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1428 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1429 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
19a0729b 1430 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1457ab52
MN
1431}
1432static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
1433 uint8_t halfH[88];
1434 uint8_t halfV[64];
1435 uint8_t halfHV[64];
1436 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1437 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1438 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
19a0729b 1439 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1457ab52
MN
1440}
1441static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
1442 uint8_t halfH[88];
1443 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1444 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
1445}
1446
332f9ac4 1447static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
4052cbf1 1448 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
332f9ac4
MN
1449 int x;
1450 const int strength= ff_h263_loop_filter_strength[qscale];
115329f1 1451
332f9ac4
MN
1452 for(x=0; x<8; x++){
1453 int d1, d2, ad1;
1454 int p0= src[x-2*stride];
1455 int p1= src[x-1*stride];
1456 int p2= src[x+0*stride];
1457 int p3= src[x+1*stride];
1458 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1459
1460 if (d<-2*strength) d1= 0;
1461 else if(d<- strength) d1=-2*strength - d;
1462 else if(d< strength) d1= d;
1463 else if(d< 2*strength) d1= 2*strength - d;
1464 else d1= 0;
115329f1 1465
332f9ac4
MN
1466 p1 += d1;
1467 p2 -= d1;
1468 if(p1&256) p1= ~(p1>>31);
1469 if(p2&256) p2= ~(p2>>31);
115329f1 1470
332f9ac4
MN
1471 src[x-1*stride] = p1;
1472 src[x+0*stride] = p2;
1473
c26abfa5 1474 ad1= FFABS(d1)>>1;
115329f1 1475
f66e4f5f 1476 d2= av_clip((p0-p3)/4, -ad1, ad1);
115329f1 1477
332f9ac4
MN
1478 src[x-2*stride] = p0 - d2;
1479 src[x+ stride] = p3 + d2;
1480 }
73f51a4d 1481 }
332f9ac4
MN
1482}
1483
1484static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
4052cbf1 1485 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
332f9ac4
MN
1486 int y;
1487 const int strength= ff_h263_loop_filter_strength[qscale];
115329f1 1488
332f9ac4
MN
1489 for(y=0; y<8; y++){
1490 int d1, d2, ad1;
1491 int p0= src[y*stride-2];
1492 int p1= src[y*stride-1];
1493 int p2= src[y*stride+0];
1494 int p3= src[y*stride+1];
1495 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1496
1497 if (d<-2*strength) d1= 0;
1498 else if(d<- strength) d1=-2*strength - d;
1499 else if(d< strength) d1= d;
1500 else if(d< 2*strength) d1= 2*strength - d;
1501 else d1= 0;
115329f1 1502
332f9ac4
MN
1503 p1 += d1;
1504 p2 -= d1;
1505 if(p1&256) p1= ~(p1>>31);
1506 if(p2&256) p2= ~(p2>>31);
115329f1 1507
332f9ac4
MN
1508 src[y*stride-1] = p1;
1509 src[y*stride+0] = p2;
1510
c26abfa5 1511 ad1= FFABS(d1)>>1;
115329f1 1512
f66e4f5f 1513 d2= av_clip((p0-p3)/4, -ad1, ad1);
115329f1 1514
332f9ac4
MN
1515 src[y*stride-2] = p0 - d2;
1516 src[y*stride+1] = p3 + d2;
1517 }
73f51a4d 1518 }
332f9ac4 1519}
1457ab52 1520
fdbbf2e0
MN
1521static void h261_loop_filter_c(uint8_t *src, int stride){
1522 int x,y,xy,yz;
1523 int temp[64];
1524
1525 for(x=0; x<8; x++){
1526 temp[x ] = 4*src[x ];
1527 temp[x + 7*8] = 4*src[x + 7*stride];
1528 }
1529 for(y=1; y<7; y++){
1530 for(x=0; x<8; x++){
1531 xy = y * stride + x;
1532 yz = y * 8 + x;
1533 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
c6148de2
MN
1534 }
1535 }
115329f1 1536
fdbbf2e0
MN
1537 for(y=0; y<8; y++){
1538 src[ y*stride] = (temp[ y*8] + 2)>>2;
1539 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
1540 for(x=1; x<7; x++){
1541 xy = y * stride + x;
1542 yz = y * 8 + x;
1543 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
c6148de2
MN
1544 }
1545 }
1546}
1547
bb198e19 1548static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
1549{
1550 int s, i;
1551
1552 s = 0;
bb198e19 1553 for(i=0;i<h;i++) {
de6d9b64
FB
1554 s += abs(pix1[0] - pix2[0]);
1555 s += abs(pix1[1] - pix2[1]);
1556 s += abs(pix1[2] - pix2[2]);
1557 s += abs(pix1[3] - pix2[3]);
1558 s += abs(pix1[4] - pix2[4]);
1559 s += abs(pix1[5] - pix2[5]);
1560 s += abs(pix1[6] - pix2[6]);
1561 s += abs(pix1[7] - pix2[7]);
1562 s += abs(pix1[8] - pix2[8]);
1563 s += abs(pix1[9] - pix2[9]);
1564 s += abs(pix1[10] - pix2[10]);
1565 s += abs(pix1[11] - pix2[11]);
1566 s += abs(pix1[12] - pix2[12]);
1567 s += abs(pix1[13] - pix2[13]);
1568 s += abs(pix1[14] - pix2[14]);
1569 s += abs(pix1[15] - pix2[15]);
1570 pix1 += line_size;
1571 pix2 += line_size;
1572 }
1573 return s;
1574}
1575
bb198e19 1576static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
1577{
1578 int s, i;
1579
1580 s = 0;
bb198e19 1581 for(i=0;i<h;i++) {
de6d9b64
FB
1582 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1583 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1584 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1585 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1586 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1587 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1588 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1589 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1590 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
1591 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
1592 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
1593 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
1594 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
1595 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
1596 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
1597 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
1598 pix1 += line_size;
1599 pix2 += line_size;
1600 }
1601 return s;
1602}
1603
bb198e19 1604static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
1605{
1606 int s, i;
0c1a9eda 1607 uint8_t *pix3 = pix2 + line_size;
de6d9b64
FB
1608
1609 s = 0;
bb198e19 1610 for(i=0;i<h;i++) {
de6d9b64
FB
1611 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1612 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1613 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1614 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1615 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1616 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1617 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1618 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1619 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
1620 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
1621 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
1622 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
1623 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
1624 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
1625 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
1626 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
1627 pix1 += line_size;
1628 pix2 += line_size;
1629 pix3 += line_size;
1630 }
1631 return s;
1632}
1633
bb198e19 1634static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
1635{
1636 int s, i;
0c1a9eda 1637 uint8_t *pix3 = pix2 + line_size;
de6d9b64
FB
1638
1639 s = 0;
bb198e19 1640 for(i=0;i<h;i++) {
de6d9b64
FB
1641 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1642 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1643 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1644 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1645 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1646 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1647 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1648 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1649 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1650 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1651 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1652 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1653 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1654 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1655 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1656 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1657 pix1 += line_size;
1658 pix2 += line_size;
1659 pix3 += line_size;
1660 }
1661 return s;
1662}
1663
bb198e19 1664static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
1665{
1666 int s, i;
1667
1668 s = 0;
bb198e19 1669 for(i=0;i<h;i++) {
ba6802de
MN
1670 s += abs(pix1[0] - pix2[0]);
1671 s += abs(pix1[1] - pix2[1]);
1672 s += abs(pix1[2] - pix2[2]);
1673 s += abs(pix1[3] - pix2[3]);
1674 s += abs(pix1[4] - pix2[4]);
1675 s += abs(pix1[5] - pix2[5]);
1676 s += abs(pix1[6] - pix2[6]);
1677 s += abs(pix1[7] - pix2[7]);
1678 pix1 += line_size;
1679 pix2 += line_size;
1680 }
1681 return s;
1682}
1683
bb198e19 1684static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
1685{
1686 int s, i;
1687
1688 s = 0;
bb198e19 1689 for(i=0;i<h;i++) {
ba6802de
MN
1690 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1691 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1692 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1693 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1694 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1695 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1696 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1697 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1698 pix1 += line_size;
1699 pix2 += line_size;
1700 }
1701 return s;
1702}
1703
bb198e19 1704static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
1705{
1706 int s, i;
0c1a9eda 1707 uint8_t *pix3 = pix2 + line_size;
ba6802de
MN
1708
1709 s = 0;
bb198e19 1710 for(i=0;i<h;i++) {
ba6802de
MN
1711 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1712 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1713 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1714 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1715 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1716 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1717 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1718 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1719 pix1 += line_size;
1720 pix2 += line_size;
1721 pix3 += line_size;
1722 }
1723 return s;
1724}
1725
bb198e19 1726static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
1727{
1728 int s, i;
0c1a9eda 1729 uint8_t *pix3 = pix2 + line_size;
ba6802de
MN
1730
1731 s = 0;
bb198e19 1732 for(i=0;i<h;i++) {
ba6802de
MN
1733 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1734 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1735 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1736 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1737 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1738 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1739 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1740 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1741 pix1 += line_size;
1742 pix2 += line_size;
1743 pix3 += line_size;
1744 }
1745 return s;
1746}
1747
bf4e3bd2
MR
1748static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1749 MpegEncContext *c = v;
e6a2ac34
MN
1750 int score1=0;
1751 int score2=0;
1752 int x,y;
d4c5d2ad 1753
e6a2ac34
MN
1754 for(y=0; y<h; y++){
1755 for(x=0; x<16; x++){
1756 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1757 }
1758 if(y+1<h){
1759 for(x=0; x<15; x++){
c26abfa5 1760 score2+= FFABS( s1[x ] - s1[x +stride]
e6a2ac34 1761 - s1[x+1] + s1[x+1+stride])
c26abfa5 1762 -FFABS( s2[x ] - s2[x +stride]
e6a2ac34
MN
1763 - s2[x+1] + s2[x+1+stride]);
1764 }
1765 }
1766 s1+= stride;
1767 s2+= stride;
1768 }
d4c5d2ad 1769
c26abfa5
DB
1770 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1771 else return score1 + FFABS(score2)*8;
e6a2ac34
MN
1772}
1773
bf4e3bd2
MR
1774static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1775 MpegEncContext *c = v;
e6a2ac34
MN
1776 int score1=0;
1777 int score2=0;
1778 int x,y;
115329f1 1779
e6a2ac34
MN
1780 for(y=0; y<h; y++){
1781 for(x=0; x<8; x++){
1782 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1783 }
1784 if(y+1<h){
1785 for(x=0; x<7; x++){
c26abfa5 1786 score2+= FFABS( s1[x ] - s1[x +stride]
e6a2ac34 1787 - s1[x+1] + s1[x+1+stride])
c26abfa5 1788 -FFABS( s2[x ] - s2[x +stride]
e6a2ac34
MN
1789 - s2[x+1] + s2[x+1+stride]);
1790 }
1791 }
1792 s1+= stride;
1793 s2+= stride;
1794 }
115329f1 1795
c26abfa5
DB
1796 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1797 else return score1 + FFABS(score2)*8;
e6a2ac34
MN
1798}
1799
364a1797
MN
1800static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
1801 int i;
1802 unsigned int sum=0;
1803
1804 for(i=0; i<8*8; i++){
1805 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
1806 int w= weight[i];
1807 b>>= RECON_SHIFT;
1808 assert(-512<b && b<512);
1809
1810 sum += (w*b)*(w*b)>>4;
1811 }
1812 return sum>>2;
1813}
1814
1815static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
1816 int i;
1817
1818 for(i=0; i<8*8; i++){
1819 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
115329f1 1820 }
364a1797
MN
1821}
1822
a9badb51
MN
1823/**
1824 * permutes an 8x8 block.
2a5700de 1825 * @param block the block which will be permuted according to the given permutation vector
a9badb51
MN
1826 * @param permutation the permutation vector
1827 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
115329f1 1828 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
2a5700de 1829 * (inverse) permutated to scantable order!
a9badb51 1830 */
0c1a9eda 1831void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
d962f6fd 1832{
7801d21d 1833 int i;
477ab036 1834 DCTELEM temp[64];
115329f1 1835
7801d21d 1836 if(last<=0) return;
90b5b51e 1837 //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
d962f6fd 1838
7801d21d
MN
1839 for(i=0; i<=last; i++){
1840 const int j= scantable[i];
1841 temp[j]= block[j];
1842 block[j]=0;
1843 }
115329f1 1844
7801d21d
MN
1845 for(i=0; i<=last; i++){
1846 const int j= scantable[i];
1847 const int perm_j= permutation[j];
1848 block[perm_j]= temp[j];
1849 }
d962f6fd 1850}
e0eac44e 1851
622348f9
MN
1852static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
1853 return 0;
1854}
1855
1856void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
1857 int i;
115329f1 1858
3899eb2f 1859 memset(cmp, 0, sizeof(void*)*6);
115329f1 1860
3899eb2f 1861 for(i=0; i<6; i++){
622348f9
MN
1862 switch(type&0xFF){
1863 case FF_CMP_SAD:
1864 cmp[i]= c->sad[i];
1865 break;
1866 case FF_CMP_SATD:
1867 cmp[i]= c->hadamard8_diff[i];
1868 break;
1869 case FF_CMP_SSE:
1870 cmp[i]= c->sse[i];
1871 break;
1872 case FF_CMP_DCT:
1873 cmp[i]= c->dct_sad[i];
1874 break;
27c61ac5
MN
1875 case FF_CMP_DCT264:
1876 cmp[i]= c->dct264_sad[i];
1877 break;
0fd6aea1
MN
1878 case FF_CMP_DCTMAX:
1879 cmp[i]= c->dct_max[i];
1880 break;
622348f9
MN
1881 case FF_CMP_PSNR:
1882 cmp[i]= c->quant_psnr[i];
1883 break;
1884 case FF_CMP_BIT:
1885 cmp[i]= c->bit[i];
1886 break;
1887 case FF_CMP_RD:
1888 cmp[i]= c->rd[i];
1889 break;
1890 case FF_CMP_VSAD:
1891 cmp[i]= c->vsad[i];
1892 break;
1893 case FF_CMP_VSSE:
1894 cmp[i]= c->vsse[i];
1895 break;
1896 case FF_CMP_ZERO:
1897 cmp[i]= zero_cmp;
1898 break;
e6a2ac34
MN
1899 case FF_CMP_NSSE:
1900 cmp[i]= c->nsse[i];
1901 break;
05aec7bb 1902#if CONFIG_DWT
26efc54e
MN
1903 case FF_CMP_W53:
1904 cmp[i]= c->w53[i];
1905 break;
1906 case FF_CMP_W97:
1907 cmp[i]= c->w97[i];
1908 break;
3a6fc8fa 1909#endif
622348f9
MN
1910 default:
1911 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
1912 }
1913 }
1914}
1915
11f18faf 1916static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
469bd7b1
LM
1917 long i;
1918 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1919 long a = *(long*)(src+i);
1920 long b = *(long*)(dst+i);
1921 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
11f18faf
MN
1922 }
1923 for(; i<w; i++)
1924 dst[i+0] += src[i+0];
1925}
1926
4a9ca0a2 1927static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
469bd7b1 1928 long i;
4a9ca0a2
LM
1929 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1930 long a = *(long*)(src1+i);
1931 long b = *(long*)(src2+i);
469bd7b1 1932 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
4a9ca0a2
LM
1933 }
1934 for(; i<w; i++)
1935 dst[i] = src1[i]+src2[i];
1936}
1937
11f18faf 1938static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
469bd7b1 1939 long i;
b250f9c6 1940#if !HAVE_FAST_UNALIGNED
469bd7b1 1941 if((long)src2 & (sizeof(long)-1)){
31304587
LM
1942 for(i=0; i+7<w; i+=8){
1943 dst[i+0] = src1[i+0]-src2[i+0];
1944 dst[i+1] = src1[i+1]-src2[i+1];
1945 dst[i+2] = src1[i+2]-src2[i+2];
1946 dst[i+3] = src1[i+3]-src2[i+3];
1947 dst[i+4] = src1[i+4]-src2[i+4];
1948 dst[i+5] = src1[i+5]-src2[i+5];
1949 dst[i+6] = src1[i+6]-src2[i+6];
1950 dst[i+7] = src1[i+7]-src2[i+7];
1951 }
469bd7b1
LM
1952 }else
1953#endif
1954 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1955 long a = *(long*)(src1+i);
1956 long b = *(long*)(src2+i);
1957 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
1958 }
11f18faf
MN
1959 for(; i<w; i++)
1960 dst[i+0] = src1[i+0]-src2[i+0];
1961}
1962
e17ccf60 1963static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
3daa434a
LM
1964 int i;
1965 uint8_t l, lt;
1966
1967 l= *left;
1968 lt= *left_top;
1969
1970 for(i=0; i<w; i++){
1971 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
1972 lt= src1[i];
1973 dst[i]= l;
1974 }
1975
1976 *left= l;
1977 *left_top= lt;
1978}
1979
e17ccf60 1980static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
84705403
MN
1981 int i;
1982 uint8_t l, lt;
1983
1984 l= *left;
1985 lt= *left_top;
1986
1987 for(i=0; i<w; i++){
1988 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
1989 lt= src1[i];
1990 l= src2[i];
1991 dst[i]= l - pred;
115329f1 1992 }
84705403
MN
1993
1994 *left= l;
1995 *left_top= lt;
1996}
1997
2d4bbdec 1998static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
73c6f598
NC
1999 int i;
2000
2001 for(i=0; i<w-1; i++){
2002 acc+= src[i];
2003 dst[i]= acc;
2004 i++;
2005 acc+= src[i];
2006 dst[i]= acc;
2007 }
2008
2009 for(; i<w; i++){
2010 acc+= src[i];
2011 dst[i]= acc;
2012 }
2013
2014 return acc;
2015}
2016
2017#if HAVE_BIGENDIAN
2018#define B 3
2019#define G 2
2020#define R 1
f267d3ac 2021#define A 0
73c6f598
NC
2022#else
2023#define B 0
2024#define G 1
2025#define R 2
f267d3ac 2026#define A 3
73c6f598 2027#endif
f267d3ac 2028static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
73c6f598 2029 int i;
f267d3ac 2030 int r,g,b,a;
73c6f598
NC
2031 r= *red;
2032 g= *green;
2033 b= *blue;
f267d3ac 2034 a= *alpha;
73c6f598
NC
2035
2036 for(i=0; i<w; i++){
2037 b+= src[4*i+B];
2038 g+= src[4*i+G];
2039 r+= src[4*i+R];
f267d3ac 2040 a+= src[4*i+A];
73c6f598
NC
2041
2042 dst[4*i+B]= b;
2043 dst[4*i+G]= g;
2044 dst[4*i+R]= r;
f267d3ac 2045 dst[4*i+A]= a;
73c6f598
NC
2046 }
2047
2048 *red= r;
2049 *green= g;
2050 *blue= b;
f267d3ac 2051 *alpha= a;
73c6f598
NC
2052}
2053#undef B
2054#undef G
2055#undef R
f267d3ac 2056#undef A
73c6f598 2057
1457ab52
MN
2058#define BUTTERFLY2(o1,o2,i1,i2) \
2059o1= (i1)+(i2);\
2060o2= (i1)-(i2);
2061
2062#define BUTTERFLY1(x,y) \
2063{\
2064 int a,b;\
2065 a= x;\
2066 b= y;\
2067 x= a+b;\
2068 y= a-b;\
2069}
2070
c26abfa5 2071#define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
1457ab52 2072
bb198e19 2073static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
1457ab52
MN
2074 int i;
2075 int temp[64];
2076 int sum=0;
115329f1 2077
bb198e19 2078 assert(h==8);
1457ab52
MN
2079
2080 for(i=0; i<8; i++){
2081 //FIXME try pointer walks
2082 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
2083 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
2084 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
2085 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
115329f1 2086
1457ab52
MN
2087 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2088 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2089 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2090 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
115329f1 2091
1457ab52
MN
2092 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2093 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2094 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2095 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2096 }
2097
2098 for(i=0; i<8; i++){
2099 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2100 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2101 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2102 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
115329f1 2103
1457ab52
MN
2104 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2105 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2106 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2107 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2108
115329f1 2109 sum +=
1457ab52
MN
2110 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2111 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2112 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2113 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2114 }
1457ab52
MN
2115 return sum;
2116}
2117
622348f9 2118static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
1457ab52
MN
2119 int i;
2120 int temp[64];
2121 int sum=0;
115329f1 2122
622348f9 2123 assert(h==8);
115329f1 2124
1457ab52
MN
2125 for(i=0; i<8; i++){
2126 //FIXME try pointer walks
622348f9
MN
2127 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
2128 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
2129 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
2130 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
115329f1 2131
1457ab52
MN
2132 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2133 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2134 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2135 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
115329f1 2136
1457ab52
MN
2137 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2138 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2139 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2140 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2141 }
2142
2143 for(i=0; i<8; i++){
2144 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2145 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2146 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2147 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
115329f1 2148
1457ab52
MN
2149 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2150 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2151 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2152 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
115329f1
DB
2153
2154 sum +=
1457ab52
MN
2155 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2156 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2157 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2158 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2159 }
115329f1 2160
c26abfa5 2161 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
115329f1 2162
1457ab52
MN
2163 return sum;
2164}
2165
bb198e19 2166static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
1457ab52 2167 MpegEncContext * const s= (MpegEncContext *)c;
40d11227 2168 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
115329f1 2169
bb198e19 2170 assert(h==8);
1457ab52
MN
2171
2172 s->dsp.diff_pixels(temp, src1, src2, stride);
b0368839 2173 s->dsp.fdct(temp);
1edbfe19 2174 return s->dsp.sum_abs_dctelem(temp);
1457ab52
MN
2175}
2176
b250f9c6 2177#if CONFIG_GPL
27c61ac5
MN
2178#define DCT8_1D {\
2179 const int s07 = SRC(0) + SRC(7);\
2180 const int s16 = SRC(1) + SRC(6);\
2181 const int s25 = SRC(2) + SRC(5);\
2182 const int s34 = SRC(3) + SRC(4);\
2183 const int a0 = s07 + s34;\
2184 const int a1 = s16 + s25;\
2185 const int a2 = s07 - s34;\
2186 const int a3 = s16 - s25;\
2187 const int d07 = SRC(0) - SRC(7);\
2188 const int d16 = SRC(1) - SRC(6);\
2189 const int d25 = SRC(2) - SRC(5);\
2190 const int d34 = SRC(3) - SRC(4);\
2191 const int a4 = d16 + d25 + (d07 + (d07>>1));\
2192 const int a5 = d07 - d34 - (d25 + (d25>>1));\
2193 const int a6 = d07 + d34 - (d16 + (d16>>1));\
2194 const int a7 = d16 - d25 + (d34 + (d34>>1));\
2195 DST(0, a0 + a1 ) ;\
2196 DST(1, a4 + (a7>>2)) ;\
2197 DST(2, a2 + (a3>>1)) ;\
2198 DST(3, a5 + (a6>>2)) ;\
2199 DST(4, a0 - a1 ) ;\
2200 DST(5, a6 - (a5>>2)) ;\
2201 DST(6, (a2>>1) - a3 ) ;\
2202 DST(7, (a4>>2) - a7 ) ;\
2203}
2204
2205static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2206 MpegEncContext * const s= (MpegEncContext *)c;
8d15910a 2207 DCTELEM dct[8][8];
27c61ac5
MN
2208 int i;
2209 int sum=0;
2210
8d15910a 2211 s->dsp.diff_pixels(dct[0], src1, src2, stride);
27c61ac5
MN
2212
2213#define SRC(x) dct[i][x]
2214#define DST(x,v) dct[i][x]= v
2215 for( i = 0; i < 8; i++ )
2216 DCT8_1D
2217#undef SRC
2218#undef DST
2219
2220#define SRC(x) dct[x][i]
c26abfa5 2221#define DST(x,v) sum += FFABS(v)
27c61ac5
MN
2222 for( i = 0; i < 8; i++ )
2223 DCT8_1D
2224#undef SRC
2225#undef DST
2226 return sum;
2227}
2228#endif
2229
0fd6aea1
MN
2230static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2231 MpegEncContext * const s= (MpegEncContext *)c;
40d11227 2232 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
0fd6aea1 2233 int sum=0, i;
115329f1 2234
0fd6aea1
MN
2235 assert(h==8);
2236
2237 s->dsp.diff_pixels(temp, src1, src2, stride);
2238 s->dsp.fdct(temp);
2239
2240 for(i=0; i<64; i++)
c26abfa5 2241 sum= FFMAX(sum, FFABS(temp[i]));
115329f1 2242
0fd6aea1
MN
2243 return sum;
2244}
2245
bb198e19 2246static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
1457ab52 2247 MpegEncContext * const s= (MpegEncContext *)c;
40d11227 2248 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
2480c390 2249 DCTELEM * const bak = temp+64;
1457ab52
MN
2250 int sum=0, i;
2251
bb198e19 2252 assert(h==8);
1457ab52 2253 s->mb_intra=0;
115329f1 2254
1457ab52 2255 s->dsp.diff_pixels(temp, src1, src2, stride);
115329f1 2256
1457ab52 2257 memcpy(bak, temp, 64*sizeof(DCTELEM));
115329f1 2258
67725183 2259 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
d50635cd 2260 s->dct_unquantize_inter(s, temp, 0, s->qscale);
59e6f60a 2261 ff_simple_idct(temp); //FIXME
115329f1 2262
1457ab52
MN
2263 for(i=0; i<64; i++)
2264 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
115329f1 2265
1457ab52
MN
2266 return sum;
2267}
2268
bb198e19 2269static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3a87ac94 2270 MpegEncContext * const s= (MpegEncContext *)c;
0c1a9eda 2271 const uint8_t *scantable= s->intra_scantable.permutated;
40d11227
MR
2272 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2273 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
2274 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
e6dba5df 2275 int i, last, run, bits, level, distortion, start_i;
3a87ac94
MN
2276 const int esc_length= s->ac_esc_length;
2277 uint8_t * length;
2278 uint8_t * last_length;
115329f1 2279
bb198e19
MN
2280 assert(h==8);
2281
90d43b52
MR
2282 copy_block8(lsrc1, src1, 8, stride, 8);
2283 copy_block8(lsrc2, src2, 8, stride, 8);
3a87ac94 2284
90d43b52 2285 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
67725183
MN
2286
2287 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2288
2289 bits=0;
115329f1 2290
3a87ac94 2291 if (s->mb_intra) {
115329f1 2292 start_i = 1;
3a87ac94
MN
2293 length = s->intra_ac_vlc_length;
2294 last_length= s->intra_ac_vlc_last_length;
67725183 2295 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3a87ac94
MN
2296 } else {
2297 start_i = 0;
2298 length = s->inter_ac_vlc_length;
2299 last_length= s->inter_ac_vlc_last_length;
2300 }
115329f1 2301
67725183 2302 if(last>=start_i){
3a87ac94
MN
2303 run=0;
2304 for(i=start_i; i<last; i++){
2305 int j= scantable[i];
2306 level= temp[j];
115329f1 2307
3a87ac94
MN
2308 if(level){
2309 level+=64;
2310 if((level&(~127)) == 0){
2311 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2312 }else
2313 bits+= esc_length;
2314 run=0;
2315 }else
2316 run++;
2317 }
2318 i= scantable[last];
115329f1 2319
3a87ac94 2320 level= temp[i] + 64;
1d0eab1d
MN
2321
2322 assert(level - 64);
115329f1 2323
3a87ac94
MN
2324 if((level&(~127)) == 0){
2325 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2326 }else
2327 bits+= esc_length;
115329f1 2328
67725183
MN
2329 }
2330
2331 if(last>=0){
d50635cd
MN
2332 if(s->mb_intra)
2333 s->dct_unquantize_intra(s, temp, 0, s->qscale);
2334 else
2335 s->dct_unquantize_inter(s, temp, 0, s->qscale);
3a87ac94 2336 }
115329f1 2337
90d43b52 2338 s->dsp.idct_add(lsrc2, 8, temp);
115329f1 2339
90d43b52 2340 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
3a87ac94 2341
e6dba5df 2342 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
3a87ac94
MN
2343}
2344
bb198e19 2345static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3a87ac94 2346 MpegEncContext * const s= (MpegEncContext *)c;
0c1a9eda 2347 const uint8_t *scantable= s->intra_scantable.permutated;
40d11227 2348 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
3a87ac94
MN
2349 int i, last, run, bits, level, start_i;
2350 const int esc_length= s->ac_esc_length;
2351 uint8_t * length;
2352 uint8_t * last_length;
bb198e19
MN
2353
2354 assert(h==8);
115329f1 2355
67725183 2356 s->dsp.diff_pixels(temp, src1, src2, stride);
3a87ac94 2357
67725183
MN
2358 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2359
2360 bits=0;
115329f1 2361
3a87ac94 2362 if (s->mb_intra) {
115329f1 2363 start_i = 1;
3a87ac94
MN
2364 length = s->intra_ac_vlc_length;
2365 last_length= s->intra_ac_vlc_last_length;
67725183 2366 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3a87ac94
MN
2367 } else {
2368 start_i = 0;
2369 length = s->inter_ac_vlc_length;
2370 last_length= s->inter_ac_vlc_last_length;
2371 }
115329f1 2372
67725183 2373 if(last>=start_i){
3a87ac94
MN
2374 run=0;
2375 for(i=start_i; i<last; i++){
2376 int j= scantable[i];
2377 level= temp[j];
115329f1 2378
3a87ac94
MN
2379 if(level){
2380 level+=64;
2381 if((level&(~127)) == 0){
2382 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2383 }else
2384 bits+= esc_length;
2385 run=0;
2386 }else
2387 run++;
2388 }
2389 i= scantable[last];
115329f1 2390
67725183 2391 level= temp[i] + 64;
115329f1 2392
67725183 2393 assert(level - 64);
115329f1 2394
3a87ac94
MN
2395 if((level&(~127)) == 0){
2396 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2397 }else
2398 bits+= esc_length;
2399 }
2400
2401 return bits;
2402}
2403
7fb7f636
RS
2404#define VSAD_INTRA(size) \
2405static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2406 int score=0; \
2407 int x,y; \
2408 \
2409 for(y=1; y<h; y++){ \
2410 for(x=0; x<size; x+=4){ \
2411 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
2412 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
2413 } \
2414 s+= stride; \
2415 } \
2416 \
2417 return score; \
2418}
2419VSAD_INTRA(8)
2420VSAD_INTRA(16)
622348f9
MN
2421
2422static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2423 int score=0;
2424 int x,y;
115329f1 2425
622348f9
MN
2426 for(y=1; y<h; y++){
2427 for(x=0; x<16; x++){
c26abfa5 2428 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
622348f9
MN
2429 }
2430 s1+= stride;
2431 s2+= stride;
2432 }
115329f1 2433
622348f9
MN
2434 return score;
2435}
2436
2437#define SQ(a) ((a)*(a))
7fb7f636
RS
2438#define VSSE_INTRA(size) \
2439static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2440 int score=0; \
2441 int x,y; \
2442 \
2443 for(y=1; y<h; y++){ \
2444 for(x=0; x<size; x+=4){ \
2445 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
2446 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
2447 } \
2448 s+= stride; \
2449 } \
2450 \
2451 return score; \
2452}
2453VSSE_INTRA(8)
2454VSSE_INTRA(16)
622348f9
MN
2455
2456static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2457 int score=0;
2458 int x,y;
115329f1 2459
622348f9
MN
2460 for(y=1; y<h; y++){
2461 for(x=0; x<16; x++){
2462 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2463 }
2464 s1+= stride;
2465 s2+= stride;
2466 }
115329f1 2467
622348f9
MN
2468 return score;
2469}
2470
a00177a9
MR
2471static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
2472 int size){
59006372
LM
2473 int score=0;
2474 int i;
2475 for(i=0; i<size; i++)
2476 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
2477 return score;
2478}
2479
9fbd14ac
DB
2480WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
2481WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
2482WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
b250f9c6 2483#if CONFIG_GPL
9fbd14ac 2484WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
60900991 2485#endif
9fbd14ac
DB
2486WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
2487WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
2488WRAPPER8_16_SQ(rd8x8_c, rd16_c)
2489WRAPPER8_16_SQ(bit8x8_c, bit16_c)
1457ab52 2490
6eabb0d3 2491static void vector_fmul_c(float *dst, const float *src0, const float *src1, int len){
eb4825b5
LM
2492 int i;
2493 for(i=0; i<len; i++)
6eabb0d3 2494 dst[i] = src0[i] * src1[i];
eb4825b5
LM
2495}
2496
2497static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
2498 int i;
2499 src1 += len-1;
2500 for(i=0; i<len; i++)
2501 dst[i] = src0[i] * src1[-i];
2502}
2503
952e8721 2504static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
eb4825b5
LM
2505 int i;
2506 for(i=0; i<len; i++)
952e8721 2507 dst[i] = src0[i] * src1[i] + src2[i];
eb4825b5
LM
2508}
2509
80ba1ddb
JR
2510static void vector_fmul_window_c(float *dst, const float *src0,
2511 const float *src1, const float *win, int len)
2512{
b9fa3208
LM
2513 int i,j;
2514 dst += len;
2515 win += len;
2516 src0+= len;
2517 for(i=-len, j=len-1; i<0; i++, j--) {
2518 float s0 = src0[i];
2519 float s1 = src1[j];
2520 float wi = win[i];
2521 float wj = win[j];
80ba1ddb
JR
2522 dst[i] = s0*wj - s1*wi;
2523 dst[j] = s0*wi + s1*wj;
b9fa3208 2524 }
f27e1d64
LM
2525}
2526
53b57211
MR
2527static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
2528 int len)
2529{
2530 int i;
2531 for (i = 0; i < len; i++)
2532 dst[i] = src[i] * mul;
2533}
2534
2535static void vector_fmul_sv_scalar_2_c(float *dst, const float *src,
2536 const float **sv, float mul, int len)
2537{
2538 int i;
2539 for (i = 0; i < len; i += 2, sv++) {
2540 dst[i ] = src[i ] * sv[0][0] * mul;
2541 dst[i+1] = src[i+1] * sv[0][1] * mul;
2542 }
2543}
2544
2545static void vector_fmul_sv_scalar_4_c(float *dst, const float *src,
2546 const float **sv, float mul, int len)
2547{
2548 int i;
2549 for (i = 0; i < len; i += 4, sv++) {
2550 dst[i ] = src[i ] * sv[0][0] * mul;
2551 dst[i+1] = src[i+1] * sv[0][1] * mul;
2552 dst[i+2] = src[i+2] * sv[0][2] * mul;
2553 dst[i+3] = src[i+3] * sv[0][3] * mul;
2554 }
2555}
2556
2557static void sv_fmul_scalar_2_c(float *dst, const float **sv, float mul,
2558 int len)
2559{
2560 int i;
2561 for (i = 0; i < len; i += 2, sv++) {
2562 dst[i ] = sv[0][0] * mul;
2563 dst[i+1] = sv[0][1] * mul;
2564 }
2565}
2566
2567static void sv_fmul_scalar_4_c(float *dst, const float **sv, float mul,
2568 int len)
2569{
2570 int i;
2571 for (i = 0; i < len; i += 4, sv++) {
2572 dst[i ] = sv[0][0] * mul;
2573 dst[i+1] = sv[0][1] * mul;
2574 dst[i+2] = sv[0][2] * mul;
2575 dst[i+3] = sv[0][3] * mul;
2576 }
2577}
2578
2579static void butterflies_float_c(float *restrict v1, float *restrict v2,
2580 int len)
2581{
2582 int i;
2583 for (i = 0; i < len; i++) {
2584 float t = v1[i] - v2[i];
2585 v1[i] += v2[i];
2586 v2[i] = t;
2587 }
2588}
2589
2590static float scalarproduct_float_c(const float *v1, const float *v2, int len)
2591{
2592 float p = 0.0;
2593 int i;
2594
2595 for (i = 0; i < len; i++)
2596 p += v1[i] * v2[i];
2597
2598 return p;
2599}
2600
0a68cd87
VS
2601static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
2602 uint32_t maxi, uint32_t maxisign)
2603{
2604
2605 if(a > mini) return mini;
187a5379 2606 else if((a^(1U<<31)) > maxisign) return maxi;
0a68cd87
VS
2607 else return a;
2608}
2609
50e23ae9 2610static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
0a68cd87
VS
2611 int i;
2612 uint32_t mini = *(uint32_t*)min;
2613 uint32_t maxi = *(uint32_t*)max;
187a5379 2614 uint32_t maxisign = maxi ^ (1U<<31);
0a68cd87 2615 uint32_t *dsti = (uint32_t*)dst;
50e23ae9 2616 const uint32_t *srci = (const uint32_t*)src;
0a68cd87
VS
2617 for(i=0; i<len; i+=8) {
2618 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
2619 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
2620 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
2621 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
2622 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
2623 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
2624 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
2625 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
2626 }
2627}
50e23ae9 2628static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
0a68cd87
VS
2629 int i;
2630 if(min < 0 && max > 0) {
2631 vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
2632 } else {
2633 for(i=0; i < len; i+=8) {
2634 dst[i ] = av_clipf(src[i ], min, max);
2635 dst[i + 1] = av_clipf(src[i + 1], min, max);
2636 dst[i + 2] = av_clipf(src[i + 2], min, max);
2637 dst[i + 3] = av_clipf(src[i + 3], min, max);
2638 dst[i + 4] = av_clipf(src[i + 4], min, max);
2639 dst[i + 5] = av_clipf(src[i + 5], min, max);
2640 dst[i + 6] = av_clipf(src[i + 6], min, max);
2641 dst[i + 7] = av_clipf(src[i + 7], min, max);
2642 }
2643 }
2644}
2645
b3858964 2646static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift)
88c0536a
KS
2647{
2648 int res = 0;
2649
2650 while (order--)
2651 res += (*v1++ * *v2++) >> shift;
2652
2653 return res;
2654}
2655
b3858964 2656static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
b1159ad9
LM
2657{
2658 int res = 0;
2659 while (order--) {
2660 res += *v1 * *v2++;
2661 *v1++ += mul * *v3++;
2662 }
2663 return res;
2664}
2665
e6e98234
JR
2666static void apply_window_int16_c(int16_t *output, const int16_t *input,
2667 const int16_t *window, unsigned int len)
2668{
2669 int i;
2670 int len2 = len >> 1;
2671
2672 for (i = 0; i < len2; i++) {
2673 int16_t w = window[i];
2674 output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15;
2675 output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
2676 }
2677}
2678
6054cd25
JR
2679static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
2680 int32_t max, unsigned int len)
2681{
2682 do {
2683 *dst++ = av_clip(*src++, min, max);
2684 *dst++ = av_clip(*src++, min, max);
2685 *dst++ = av_clip(*src++, min, max);
2686 *dst++ = av_clip(*src++, min, max);
2687 *dst++ = av_clip(*src++, min, max);
2688 *dst++ = av_clip(*src++, min, max);
2689 *dst++ = av_clip(*src++, min, max);
2690 *dst++ = av_clip(*src++, min, max);
2691 len -= 8;
2692 } while (len > 0);
2693}
2694
9abc7e0f
MN
2695#define W0 2048
2696#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
2697#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
2698#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
2699#define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
2700#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
2701#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
2702#define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
2703
2704static void wmv2_idct_row(short * b)
2705{
2706 int s1,s2;
2707 int a0,a1,a2,a3,a4,a5,a6,a7;
2708 /*step 1*/
2709 a1 = W1*b[1]+W7*b[7];
2710 a7 = W7*b[1]-W1*b[7];
2711 a5 = W5*b[5]+W3*b[3];
2712 a3 = W3*b[5]-W5*b[3];
2713 a2 = W2*b[2]+W6*b[6];
2714 a6 = W6*b[2]-W2*b[6];
2715 a0 = W0*b[0]+W0*b[4];
2716 a4 = W0*b[0]-W0*b[4];
2717 /*step 2*/
2718 s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7,
2719 s2 = (181*(a1-a5-a7+a3)+128)>>8;
2720 /*step 3*/
2721 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
2722 b[1] = (a4+a6 +s1 + (1<<7))>>8;
2723 b[2] = (a4-a6 +s2 + (1<<7))>>8;
2724 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
2725 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
2726 b[5] = (a4-a6 -s2 + (1<<7))>>8;
2727 b[6] = (a4+a6 -s1 + (1<<7))>>8;
2728 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
2729}
2730static void wmv2_idct_col(short * b)
2731{
2732 int s1,s2;
2733 int a0,a1,a2,a3,a4,a5,a6,a7;
2734 /*step 1, with extended precision*/
2735 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
2736 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
2737 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
2738 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
2739 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
2740 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
2741 a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
2742 a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
2743 /*step 2*/
2744 s1 = (181*(a1-a5+a7-a3)+128)>>8;
2745 s2 = (181*(a1-a5-a7+a3)+128)>>8;
2746 /*step 3*/
2747 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
2748 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
2749 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
2750 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
2751
2752 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
2753 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
2754 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
2755 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
2756}
2757void ff_wmv2_idct_c(short * block){
2758 int i;
2759
2760 for(i=0;i<64;i+=8){
2761 wmv2_idct_row(block+i);
2762 }
2763 for(i=0;i<8;i++){
2764 wmv2_idct_col(block+i);
2765 }
2766}
b0368839
MN
2767/* XXX: those functions should be suppressed ASAP when all IDCTs are
2768 converted */
9abc7e0f
MN
2769static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
2770{
2771 ff_wmv2_idct_c(block);
484a337c 2772 ff_put_pixels_clamped_c(block, dest, line_size);
9abc7e0f
MN
2773}
2774static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
2775{
2776 ff_wmv2_idct_c(block);
484a337c 2777 ff_add_pixels_clamped_c(block, dest, line_size);
9abc7e0f 2778}
b0368839
MN
2779static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
2780{
2781 j_rev_dct (block);
484a337c 2782 ff_put_pixels_clamped_c(block, dest, line_size);
b0368839
MN
2783}
2784static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
2785{
2786 j_rev_dct (block);
484a337c 2787 ff_add_pixels_clamped_c(block, dest, line_size);
b0368839
MN
2788}
2789
178fcca8
MN
2790static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
2791{
2792 j_rev_dct4 (block);
2793 put_pixels_clamped4_c(block, dest, line_size);
2794}
2795static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
2796{
2797 j_rev_dct4 (block);
2798 add_pixels_clamped4_c(block, dest, line_size);
2799}
2800
9ca358b9
MN
2801static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
2802{
2803 j_rev_dct2 (block);
2804 put_pixels_clamped2_c(block, dest, line_size);
2805}
2806static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
2807{
2808 j_rev_dct2 (block);
2809 add_pixels_clamped2_c(block, dest, line_size);
2810}
2811
1aa8c57b
MN
2812static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
2813{
55fde95e 2814 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1aa8c57b
MN
2815
2816 dest[0] = cm[(block[0] + 4)>>3];
2817}
2818static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
2819{
55fde95e 2820 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1aa8c57b
MN
2821
2822 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
2823}
2824
d111e41f 2825static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
513fbd8e 2826
59cf08ce 2827/* init static data */
0752cd39 2828av_cold void dsputil_static_init(void)
e0eac44e 2829{
d2975f8d 2830 int i;
e0eac44e 2831
55fde95e 2832 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
59cf08ce 2833 for(i=0;i<MAX_NEG_CROP;i++) {
55fde95e
MR
2834 ff_cropTbl[i] = 0;
2835 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
59cf08ce 2836 }
115329f1 2837
59cf08ce 2838 for(i=0;i<512;i++) {
1d503957 2839 ff_squareTbl[i] = (i - 256) * (i - 256);
59cf08ce 2840 }
115329f1 2841
486497e0 2842 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
59cf08ce 2843}
92ddb692 2844
6dc7d5da
MN
2845int ff_check_alignment(void){
2846 static int did_fail=0;
58d7efdb 2847 LOCAL_ALIGNED_16(int, aligned);
6dc7d5da 2848
d4efacff 2849 if((intptr_t)&aligned & 15){
6dc7d5da 2850 if(!did_fail){
b250f9c6 2851#if HAVE_MMX || HAVE_ALTIVEC
6dc7d5da 2852 av_log(NULL, AV_LOG_ERROR,
c1173617
MR
2853 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
2854 "and may be very slow or crash. This is not a bug in libavcodec,\n"
5e4c7ca2 2855 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
6001dad6 2856 "Do not report crashes to Libav developers.\n");
6dc7d5da
MN
2857#endif
2858 did_fail=1;
2859 }
2860 return -1;
2861 }
2862 return 0;
2863}
92ddb692 2864
0752cd39 2865av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
59cf08ce
FB
2866{
2867 int i;
de6d9b64 2868
6dc7d5da
MN
2869 ff_check_alignment();
2870
b250f9c6 2871#if CONFIG_ENCODERS
10acc479 2872 if(avctx->dct_algo==FF_DCT_FASTINT) {
b0368839 2873 c->fdct = fdct_ifast;
bb270c08 2874 c->fdct248 = fdct_ifast248;
115329f1 2875 }
10acc479 2876 else if(avctx->dct_algo==FF_DCT_FAAN) {
65e4c8c9 2877 c->fdct = ff_faandct;
bb270c08 2878 c->fdct248 = ff_faandct248;
115329f1 2879 }
10acc479 2880 else {
b0368839 2881 c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
bb270c08 2882 c->fdct248 = ff_fdct248_islow;
10acc479 2883 }
b0368839
MN
2884#endif //CONFIG_ENCODERS
2885
178fcca8 2886 if(avctx->lowres==1){
49fb20cb 2887 if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO || !CONFIG_H264_DECODER){
0fa8158d
MN
2888 c->idct_put= ff_jref_idct4_put;
2889 c->idct_add= ff_jref_idct4_add;
2890 }else{
19a0729b
OA
2891 if (avctx->codec_id != CODEC_ID_H264) {
2892 c->idct_put= ff_h264_lowres_idct_put_8_c;
2893 c->idct_add= ff_h264_lowres_idct_add_8_c;
2894 } else {
2895 switch (avctx->bits_per_raw_sample) {
2896 case 9:
2897 c->idct_put= ff_h264_lowres_idct_put_9_c;
2898 c->idct_add= ff_h264_lowres_idct_add_9_c;
2899 break;
2900 case 10:
2901 c->idct_put= ff_h264_lowres_idct_put_10_c;
2902 c->idct_add= ff_h264_lowres_idct_add_10_c;
2903 break;
2904 default:
2905 c->idct_put= ff_h264_lowres_idct_put_8_c;
2906 c->idct_add= ff_h264_lowres_idct_add_8_c;
2907 }
2908 }
0fa8158d 2909 }
178fcca8 2910 c->idct = j_rev_dct4;
b0368839 2911 c->idct_permutation_type= FF_NO_IDCT_PERM;
9ca358b9
MN
2912 }else if(avctx->lowres==2){
2913 c->idct_put= ff_jref_idct2_put;
2914 c->idct_add= ff_jref_idct2_add;
2915 c->idct = j_rev_dct2;
2916 c->idct_permutation_type= FF_NO_IDCT_PERM;
1aa8c57b
MN
2917 }else if(avctx->lowres==3){
2918 c->idct_put= ff_jref_idct1_put;
2919 c->idct_add= ff_jref_idct1_add;
2920 c->idct = j_rev_dct1;
2921 c->idct_permutation_type= FF_NO_IDCT_PERM;
178fcca8
MN
2922 }else{
2923 if(avctx->idct_algo==FF_IDCT_INT){
2924 c->idct_put= ff_jref_idct_put;
2925 c->idct_add= ff_jref_idct_add;
2926 c->idct = j_rev_dct;
2927 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
99e5a9d1 2928 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
9b5dc867 2929 avctx->idct_algo==FF_IDCT_VP3){
8b6103da
MN
2930 c->idct_put= ff_vp3_idct_put_c;
2931 c->idct_add= ff_vp3_idct_add_c;
2932 c->idct = ff_vp3_idct_c;
2933 c->idct_permutation_type= FF_NO_IDCT_PERM;
9abc7e0f
MN
2934 }else if(avctx->idct_algo==FF_IDCT_WMV2){
2935 c->idct_put= ff_wmv2_idct_put_c;
2936 c->idct_add= ff_wmv2_idct_add_c;
2937 c->idct = ff_wmv2_idct_c;
2938 c->idct_permutation_type= FF_NO_IDCT_PERM;
6f08c541
MN
2939 }else if(avctx->idct_algo==FF_IDCT_FAAN){
2940 c->idct_put= ff_faanidct_put;
2941 c->idct_add= ff_faanidct_add;
2942 c->idct = ff_faanidct;
2943 c->idct_permutation_type= FF_NO_IDCT_PERM;
49fb20cb 2944 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
28245435
PR
2945 c->idct_put= ff_ea_idct_put_c;
2946 c->idct_permutation_type= FF_NO_IDCT_PERM;
342c7dfd
KS
2947 }else if(CONFIG_BINK_DECODER && avctx->idct_algo==FF_IDCT_BINK) {
2948 c->idct = ff_bink_idct_c;
2949 c->idct_add = ff_bink_idct_add_c;
2950 c->idct_put = ff_bink_idct_put_c;
2951 c->idct_permutation_type = FF_NO_IDCT_PERM;
178fcca8 2952 }else{ //accurate/default
59e6f60a
AJ
2953 c->idct_put= ff_simple_idct_put;
2954 c->idct_add= ff_simple_idct_add;
2955 c->idct = ff_simple_idct;
178fcca8
MN
2956 c->idct_permutation_type= FF_NO_IDCT_PERM;
2957 }
b0368839
MN
2958 }
2959
eb4b3dd3
ZK
2960 c->get_pixels = get_pixels_c;
2961 c->diff_pixels = diff_pixels_c;
484a337c
RB
2962 c->put_pixels_clamped = ff_put_pixels_clamped_c;
2963 c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
342c7dfd 2964 c->put_pixels_nonclamped = put_pixels_nonclamped_c;
484a337c 2965 c->add_pixels_clamped = ff_add_pixels_clamped_c;
1edbfe19 2966 c->sum_abs_dctelem = sum_abs_dctelem_c;
eb4b3dd3 2967 c->gmc1 = gmc1_c;
703c8195 2968 c->gmc = ff_gmc_c;
eb4b3dd3
ZK
2969 c->pix_sum = pix_sum_c;
2970 c->pix_norm1 = pix_norm1_c;
2971
342c7dfd
KS
2972 c->fill_block_tab[0] = fill_block16_c;
2973 c->fill_block_tab[1] = fill_block8_c;
2974 c->scale_block = scale_block_c;
2975
45553457 2976 /* TODO [0] 16 [1] 8 */
bb198e19
MN
2977 c->pix_abs[0][0] = pix_abs16_c;
2978 c->pix_abs[0][1] = pix_abs16_x2_c;
2979 c->pix_abs[0][2] = pix_abs16_y2_c;
2980 c->pix_abs[0][3] = pix_abs16_xy2_c;
2981 c->pix_abs[1][0] = pix_abs8_c;
2982 c->pix_abs[1][1] = pix_abs8_x2_c;
2983 c->pix_abs[1][2] = pix_abs8_y2_c;
2984 c->pix_abs[1][3] = pix_abs8_xy2_c;
eb4b3dd3 2985
669ac79c
MN
2986 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
2987 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
2988 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
2989 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
2990 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
2991 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
2992 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
2993 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
2994 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
2995
da3b9756
MM
2996 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
2997 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
2998 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
2999 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
3000 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
3001 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
3002 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
3003 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
3004 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
3005
45553457
ZK
3006#define dspfunc(PFX, IDX, NUM) \
3007 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
3008 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
3009 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
3010 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
3011 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
3012 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
3013 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
3014 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
3015 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
3016 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
3017 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
3018 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
3019 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
3020 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
3021 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
3022 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
3023
3024 dspfunc(put_qpel, 0, 16);
3025 dspfunc(put_no_rnd_qpel, 0, 16);
3026
3027 dspfunc(avg_qpel, 0, 16);
3028 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
3029
3030 dspfunc(put_qpel, 1, 8);
3031 dspfunc(put_no_rnd_qpel, 1, 8);
3032
3033 dspfunc(avg_qpel, 1, 8);
3034 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
0da71265 3035
45553457 3036#undef dspfunc
5a6a9e78 3037
bf4f19dc
RP
3038#if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
3039 ff_mlp_init(c, avctx);
3040#endif
9be6f0d2 3041#if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
9abc7e0f
MN
3042 ff_intrax8dsp_init(c,avctx);
3043#endif
b250f9c6 3044#if CONFIG_RV30_DECODER
6beb8b26
KS
3045 ff_rv30dsp_init(c,avctx);
3046#endif
b250f9c6 3047#if CONFIG_RV40_DECODER
2d8a0815
KS
3048 ff_rv40dsp_init(c,avctx);
3049 c->put_rv40_qpel_pixels_tab[0][15] = put_rv40_qpel16_mc33_c;
3050 c->avg_rv40_qpel_pixels_tab[0][15] = avg_rv40_qpel16_mc33_c;
3051 c->put_rv40_qpel_pixels_tab[1][15] = put_rv40_qpel8_mc33_c;
3052 c->avg_rv40_qpel_pixels_tab[1][15] = avg_rv40_qpel8_mc33_c;
3053#endif
b482e2d1 3054
3d1b1caa 3055 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
1457ab52
MN
3056 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
3057 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
3058 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
3059 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
3060 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
3061 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
3062 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
115329f1 3063
bb198e19
MN
3064#define SET_CMP_FUNC(name) \
3065 c->name[0]= name ## 16_c;\
3066 c->name[1]= name ## 8x8_c;
115329f1 3067
bb198e19 3068 SET_CMP_FUNC(hadamard8_diff)
622348f9 3069 c->hadamard8_diff[4]= hadamard8_intra16_c;
7fb7f636 3070 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
bb198e19 3071 SET_CMP_FUNC(dct_sad)
0fd6aea1 3072 SET_CMP_FUNC(dct_max)
b250f9c6 3073#if CONFIG_GPL
27c61ac5 3074 SET_CMP_FUNC(dct264_sad)
60900991 3075#endif
bb198e19
MN
3076 c->sad[0]= pix_abs16_c;
3077 c->sad[1]= pix_abs8_c;
3078 c->sse[0]= sse16_c;
3079 c->sse[1]= sse8_c;
26efc54e 3080 c->sse[2]= sse4_c;
bb198e19
MN
3081 SET_CMP_FUNC(quant_psnr)
3082 SET_CMP_FUNC(rd)
3083 SET_CMP_FUNC(bit)
622348f9
MN
3084 c->vsad[0]= vsad16_c;
3085 c->vsad[4]= vsad_intra16_c;
7fb7f636 3086 c->vsad[5]= vsad_intra8_c;
622348f9
MN
3087 c->vsse[0]= vsse16_c;
3088 c->vsse[4]= vsse_intra16_c;
7fb7f636 3089 c->vsse[5]= vsse_intra8_c;
e6a2ac34
MN
3090 c->nsse[0]= nsse16_c;
3091 c->nsse[1]= nsse8_c;
05aec7bb
MR
3092#if CONFIG_DWT
3093 ff_dsputil_init_dwt(c);
3a6fc8fa 3094#endif
26efc54e 3095
59006372
LM
3096 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
3097
11f18faf 3098 c->add_bytes= add_bytes_c;
4a9ca0a2 3099 c->add_bytes_l2= add_bytes_l2_c;
11f18faf 3100 c->diff_bytes= diff_bytes_c;
3daa434a 3101 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
84705403 3102 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
73c6f598
NC
3103 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
3104 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
3d2e8cce 3105 c->bswap_buf= bswap_buf;
381d37fd 3106 c->bswap16_buf = bswap16_buf;
b250f9c6 3107#if CONFIG_PNG_DECODER
4a9ca0a2
LM
3108 c->add_png_paeth_prediction= ff_add_png_paeth_prediction;
3109#endif
42251a2a 3110
4052cbf1 3111 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
674eeb5f
AJ
3112 c->h263_h_loop_filter= h263_h_loop_filter_c;
3113 c->h263_v_loop_filter= h263_v_loop_filter_c;
eb75a698 3114 }
115329f1 3115
99e5a9d1 3116 if (CONFIG_VP3_DECODER) {
9971331d
DC
3117 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
3118 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
eb6a6cd7 3119 c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
9971331d
DC
3120 }
3121
fdbbf2e0 3122 c->h261_loop_filter= h261_loop_filter_c;
115329f1 3123
364a1797
MN
3124 c->try_8x8basis= try_8x8basis_c;
3125 c->add_8x8basis= add_8x8basis_c;
11f18faf 3126
b250f9c6 3127#if CONFIG_VORBIS_DECODER
2dac4acf
LM
3128 c->vorbis_inverse_coupling = vorbis_inverse_coupling;
3129#endif
b250f9c6 3130#if CONFIG_AC3_DECODER
ac2e5564
LM
3131 c->ac3_downmix = ff_ac3_downmix_c;
3132#endif
eb4825b5
LM
3133 c->vector_fmul = vector_fmul_c;
3134 c->vector_fmul_reverse = vector_fmul_reverse_c;
952e8721 3135 c->vector_fmul_add = vector_fmul_add_c;
80ba1ddb 3136 c->vector_fmul_window = vector_fmul_window_c;
0a68cd87 3137 c->vector_clipf = vector_clipf_c;
88c0536a 3138 c->scalarproduct_int16 = scalarproduct_int16_c;
b1159ad9 3139 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
e6e98234 3140 c->apply_window_int16 = apply_window_int16_c;
6054cd25 3141 c->vector_clip_int32 = vector_clip_int32_c;
53b57211
MR
3142 c->scalarproduct_float = scalarproduct_float_c;
3143 c->butterflies_float = butterflies_float_c;
3144 c->vector_fmul_scalar = vector_fmul_scalar_c;
3145
3146 c->vector_fmul_sv_scalar[0] = vector_fmul_sv_scalar_2_c;
3147 c->vector_fmul_sv_scalar[1] = vector_fmul_sv_scalar_4_c;
3148
3149 c->sv_fmul_scalar[0] = sv_fmul_scalar_2_c;
3150 c->sv_fmul_scalar[1] = sv_fmul_scalar_4_c;
2dac4acf 3151
9686abb8 3152 c->shrink[0]= av_image_copy_plane;
54009d42
MN
3153 c->shrink[1]= ff_shrink22;
3154 c->shrink[2]= ff_shrink44;
3155 c->shrink[3]= ff_shrink88;
3156
513fbd8e
LM
3157 c->prefetch= just_return;
3158
2833fc46
LM
3159 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
3160 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
3161
19a0729b
OA
3162#undef FUNC
3163#undef FUNCC
3164#define FUNC(f, depth) f ## _ ## depth
3165#define FUNCC(f, depth) f ## _ ## depth ## _c
3166
3167#define dspfunc1(PFX, IDX, NUM, depth)\
3168 c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM , depth);\
3169 c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
3170 c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
3171 c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
3172
3173#define dspfunc2(PFX, IDX, NUM, depth)\
3174 c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
3175 c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
3176 c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
3177 c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
3178 c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
3179 c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
3180 c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
3181 c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
3182 c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
3183 c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
3184 c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
3185 c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
3186 c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
3187 c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
3188 c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
3189 c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
3190
3191
3192#define BIT_DEPTH_FUNCS(depth)\
3193 c->draw_edges = FUNCC(draw_edges , depth);\
3194 c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\
3195 c->clear_block = FUNCC(clear_block , depth);\
3196 c->clear_blocks = FUNCC(clear_blocks , depth);\
3197 c->add_pixels8 = FUNCC(add_pixels8 , depth);\
3198 c->add_pixels4 = FUNCC(add_pixels4 , depth);\
3199 c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\
3200 c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\
3201\
3202 c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\
3203 c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\
3204 c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\
3205 c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\
3206 c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\
3207 c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth);\
3208\
3209 dspfunc1(put , 0, 16, depth);\
3210 dspfunc1(put , 1, 8, depth);\
3211 dspfunc1(put , 2, 4, depth);\
3212 dspfunc1(put , 3, 2, depth);\
3213 dspfunc1(put_no_rnd, 0, 16, depth);\
3214 dspfunc1(put_no_rnd, 1, 8, depth);\
3215 dspfunc1(avg , 0, 16, depth);\
3216 dspfunc1(avg , 1, 8, depth);\
3217 dspfunc1(avg , 2, 4, depth);\
3218 dspfunc1(avg , 3, 2, depth);\
3219 dspfunc1(avg_no_rnd, 0, 16, depth);\
3220 dspfunc1(avg_no_rnd, 1, 8, depth);\
3221\
3222 dspfunc2(put_h264_qpel, 0, 16, depth);\
3223 dspfunc2(put_h264_qpel, 1, 8, depth);\
3224 dspfunc2(put_h264_qpel, 2, 4, depth);\
3225 dspfunc2(put_h264_qpel, 3, 2, depth);\
3226 dspfunc2(avg_h264_qpel, 0, 16, depth);\
3227 dspfunc2(avg_h264_qpel, 1, 8, depth);\
3228 dspfunc2(avg_h264_qpel, 2, 4, depth);
3229
3230 if (avctx->codec_id != CODEC_ID_H264 || avctx->bits_per_raw_sample == 8) {
3231 BIT_DEPTH_FUNCS(8)
3232 } else {
3233 switch (avctx->bits_per_raw_sample) {
3234 case 9:
3235 BIT_DEPTH_FUNCS(9)
3236 break;
3237 case 10:
3238 BIT_DEPTH_FUNCS(10)
3239 break;
3240 default:
3241 av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample);
3242 BIT_DEPTH_FUNCS(8)
3243 break;
3244 }
3245 }
3246
3247
49fb20cb
AJ
3248 if (HAVE_MMX) dsputil_init_mmx (c, avctx);
3249 if (ARCH_ARM) dsputil_init_arm (c, avctx);
3250 if (CONFIG_MLIB) dsputil_init_mlib (c, avctx);
3251 if (HAVE_VIS) dsputil_init_vis (c, avctx);
3252 if (ARCH_ALPHA) dsputil_init_alpha (c, avctx);
3253 if (ARCH_PPC) dsputil_init_ppc (c, avctx);
3254 if (HAVE_MMI) dsputil_init_mmi (c, avctx);
3255 if (ARCH_SH4) dsputil_init_sh4 (c, avctx);
3256 if (ARCH_BFIN) dsputil_init_bfin (c, avctx);
43f1708f 3257
2833fc46
LM
3258 for(i=0; i<64; i++){
3259 if(!c->put_2tap_qpel_pixels_tab[0][i])
3260 c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
3261 if(!c->avg_2tap_qpel_pixels_tab[0][i])
3262 c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
3263 }
3264
eca9e403
MR
3265 c->put_rv30_tpel_pixels_tab[0][0] = c->put_h264_qpel_pixels_tab[0][0];
3266 c->put_rv30_tpel_pixels_tab[1][0] = c->put_h264_qpel_pixels_tab[1][0];
3267 c->avg_rv30_tpel_pixels_tab[0][0] = c->avg_h264_qpel_pixels_tab[0][0];
3268 c->avg_rv30_tpel_pixels_tab[1][0] = c->avg_h264_qpel_pixels_tab[1][0];
3269
3270 c->put_rv40_qpel_pixels_tab[0][0] = c->put_h264_qpel_pixels_tab[0][0];
3271 c->put_rv40_qpel_pixels_tab[1][0] = c->put_h264_qpel_pixels_tab[1][0];
3272 c->avg_rv40_qpel_pixels_tab[0][0] = c->avg_h264_qpel_pixels_tab[0][0];
3273 c->avg_rv40_qpel_pixels_tab[1][0] = c->avg_h264_qpel_pixels_tab[1][0];
3274
b0368839
MN
3275 switch(c->idct_permutation_type){
3276 case FF_NO_IDCT_PERM:
3277 for(i=0; i<64; i++)
3278 c->idct_permutation[i]= i;
3279 break;
3280 case FF_LIBMPEG2_IDCT_PERM:
3281 for(i=0; i<64; i++)
3282 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
3283 break;
3284 case FF_SIMPLE_IDCT_PERM:
3285 for(i=0; i<64; i++)
3286 c->idct_permutation[i]= simple_mmx_permutation[i];
3287 break;
3288 case FF_TRANSPOSE_IDCT_PERM:
3289 for(i=0; i<64; i++)
3290 c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
3291 break;
5773a746
MN
3292 case FF_PARTTRANS_IDCT_PERM:
3293 for(i=0; i<64; i++)
3294 c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
3295 break;
0e956ba2
AS
3296 case FF_SSE2_IDCT_PERM:
3297 for(i=0; i<64; i++)
3298 c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
3299 break;
b0368839 3300 default:
9b879566 3301 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
b0368839 3302 }
57060b1e 3303}
b0368839 3304