wmv2: move IDCT to its own DSP context.
[libav.git] / libavcodec / dsputil.c
CommitLineData
de6d9b64
FB
1/*
2 * DSP utils
406792e7 3 * Copyright (c) 2000, 2001 Fabrice Bellard
8f2ab833 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
de6d9b64 5 *
7b94177e
DB
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
7 *
2912e87a 8 * This file is part of Libav.
b78e7197 9 *
2912e87a 10 * Libav is free software; you can redistribute it and/or
ff4ec49e
FB
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
b78e7197 13 * version 2.1 of the License, or (at your option) any later version.
de6d9b64 14 *
2912e87a 15 * Libav is distributed in the hope that it will be useful,
de6d9b64 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ff4ec49e
FB
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
de6d9b64 19 *
ff4ec49e 20 * You should have received a copy of the GNU Lesser General Public
2912e87a 21 * License along with Libav; if not, write to the Free Software
5509bffa 22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
de6d9b64 23 */
115329f1 24
983e3246 25/**
ba87f080 26 * @file
983e3246
MN
27 * DSP utils
28 */
115329f1 29
737eb597 30#include "libavutil/imgutils.h"
de6d9b64
FB
31#include "avcodec.h"
32#include "dsputil.h"
b0368839 33#include "simple_idct.h"
65e4c8c9 34#include "faandct.h"
6f08c541 35#include "faanidct.h"
199436b9 36#include "mathops.h"
af818f7a
DB
37#include "mpegvideo.h"
38#include "config.h"
5596c60c 39
55fde95e 40uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
1d503957 41uint32_t ff_squareTbl[512] = {0, };
de6d9b64 42
19a0729b
OA
43#define BIT_DEPTH 9
44#include "dsputil_template.c"
45#undef BIT_DEPTH
46
47#define BIT_DEPTH 10
48#include "dsputil_template.c"
49#undef BIT_DEPTH
50
51#define BIT_DEPTH 8
325eefa2
OA
52#include "dsputil_template.c"
53
917f55cc
LM
54// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
55#define pb_7f (~0UL/255 * 0x7f)
56#define pb_80 (~0UL/255 * 0x80)
469bd7b1 57
0c1a9eda 58const uint8_t ff_zigzag_direct[64] = {
2ad1516a
MN
59 0, 1, 8, 16, 9, 2, 3, 10,
60 17, 24, 32, 25, 18, 11, 4, 5,
e0eac44e 61 12, 19, 26, 33, 40, 48, 41, 34,
2ad1516a 62 27, 20, 13, 6, 7, 14, 21, 28,
e0eac44e
FB
63 35, 42, 49, 56, 57, 50, 43, 36,
64 29, 22, 15, 23, 30, 37, 44, 51,
65 58, 59, 52, 45, 38, 31, 39, 46,
66 53, 60, 61, 54, 47, 55, 62, 63
67};
68
10acc479
RS
69/* Specific zigzag scan for 248 idct. NOTE that unlike the
70 specification, we interleave the fields */
71const uint8_t ff_zigzag248_direct[64] = {
72 0, 8, 1, 9, 16, 24, 2, 10,
73 17, 25, 32, 40, 48, 56, 33, 41,
74 18, 26, 3, 11, 4, 12, 19, 27,
75 34, 42, 49, 57, 50, 58, 35, 43,
76 20, 28, 5, 13, 6, 14, 21, 29,
77 36, 44, 51, 59, 52, 60, 37, 45,
78 22, 30, 7, 15, 23, 31, 38, 46,
79 53, 61, 54, 62, 39, 47, 55, 63,
80};
81
2f349de2 82/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
873c89e2 83DECLARE_ALIGNED(16, uint16_t, ff_inv_zigzag_direct16)[64];
2f349de2 84
0c1a9eda 85const uint8_t ff_alternate_horizontal_scan[64] = {
115329f1 86 0, 1, 2, 3, 8, 9, 16, 17,
e0eac44e 87 10, 11, 4, 5, 6, 7, 15, 14,
115329f1 88 13, 12, 19, 18, 24, 25, 32, 33,
e0eac44e 89 26, 27, 20, 21, 22, 23, 28, 29,
115329f1 90 30, 31, 34, 35, 40, 41, 48, 49,
e0eac44e 91 42, 43, 36, 37, 38, 39, 44, 45,
115329f1 92 46, 47, 50, 51, 56, 57, 58, 59,
e0eac44e
FB
93 52, 53, 54, 55, 60, 61, 62, 63,
94};
95
0c1a9eda 96const uint8_t ff_alternate_vertical_scan[64] = {
115329f1 97 0, 8, 16, 24, 1, 9, 2, 10,
e0eac44e 98 17, 25, 32, 40, 48, 56, 57, 49,
115329f1 99 41, 33, 26, 18, 3, 11, 4, 12,
e0eac44e 100 19, 27, 34, 42, 50, 58, 35, 43,
115329f1 101 51, 59, 20, 28, 5, 13, 6, 14,
e0eac44e 102 21, 29, 36, 44, 52, 60, 37, 45,
115329f1 103 53, 61, 22, 30, 7, 15, 23, 31,
e0eac44e
FB
104 38, 46, 54, 62, 39, 47, 55, 63,
105};
106
b0368839
MN
107/* Input permutation for the simple_idct_mmx */
108static const uint8_t simple_mmx_permutation[64]={
bb270c08
DB
109 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
110 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
111 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
112 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
113 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
114 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
115 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
116 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
b0368839
MN
117};
118
0e956ba2
AS
119static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
120
4c79b95c
AJ
121void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
122 int i;
123 int end;
124
125 st->scantable= src_scantable;
126
127 for(i=0; i<64; i++){
128 int j;
129 j = src_scantable[i];
130 st->permutated[i] = permutation[j];
4c79b95c
AJ
131 }
132
133 end=-1;
134 for(i=0; i<64; i++){
135 int j;
136 j = st->permutated[i];
137 if(j>end) end=j;
138 st->raster_end[i]= end;
139 }
140}
141
92fb52d9
RB
142void ff_init_scantable_permutation(uint8_t *idct_permutation,
143 int idct_permutation_type)
144{
145 int i;
146
147 switch(idct_permutation_type){
148 case FF_NO_IDCT_PERM:
149 for(i=0; i<64; i++)
150 idct_permutation[i]= i;
151 break;
152 case FF_LIBMPEG2_IDCT_PERM:
153 for(i=0; i<64; i++)
154 idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
155 break;
156 case FF_SIMPLE_IDCT_PERM:
157 for(i=0; i<64; i++)
158 idct_permutation[i]= simple_mmx_permutation[i];
159 break;
160 case FF_TRANSPOSE_IDCT_PERM:
161 for(i=0; i<64; i++)
162 idct_permutation[i]= ((i&7)<<3) | (i>>3);
163 break;
164 case FF_PARTTRANS_IDCT_PERM:
165 for(i=0; i<64; i++)
166 idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
167 break;
168 case FF_SSE2_IDCT_PERM:
169 for(i=0; i<64; i++)
170 idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
171 break;
172 default:
173 av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
174 }
175}
176
0c1a9eda 177static int pix_sum_c(uint8_t * pix, int line_size)
3aa102be
MN
178{
179 int s, i, j;
180
181 s = 0;
182 for (i = 0; i < 16; i++) {
bb270c08
DB
183 for (j = 0; j < 16; j += 8) {
184 s += pix[0];
185 s += pix[1];
186 s += pix[2];
187 s += pix[3];
188 s += pix[4];
189 s += pix[5];
190 s += pix[6];
191 s += pix[7];
192 pix += 8;
193 }
194 pix += line_size - 16;
3aa102be
MN
195 }
196 return s;
197}
198
0c1a9eda 199static int pix_norm1_c(uint8_t * pix, int line_size)
3aa102be
MN
200{
201 int s, i, j;
1d503957 202 uint32_t *sq = ff_squareTbl + 256;
3aa102be
MN
203
204 s = 0;
205 for (i = 0; i < 16; i++) {
bb270c08 206 for (j = 0; j < 16; j += 8) {
e463f0e9
DB
207#if 0
208 s += sq[pix[0]];
209 s += sq[pix[1]];
210 s += sq[pix[2]];
211 s += sq[pix[3]];
212 s += sq[pix[4]];
213 s += sq[pix[5]];
214 s += sq[pix[6]];
215 s += sq[pix[7]];
216#else
d9a9f50a 217#if HAVE_FAST_64BIT
bb270c08
DB
218 register uint64_t x=*(uint64_t*)pix;
219 s += sq[x&0xff];
220 s += sq[(x>>8)&0xff];
221 s += sq[(x>>16)&0xff];
222 s += sq[(x>>24)&0xff];
2a006cd3
FL
223 s += sq[(x>>32)&0xff];
224 s += sq[(x>>40)&0xff];
225 s += sq[(x>>48)&0xff];
226 s += sq[(x>>56)&0xff];
227#else
bb270c08
DB
228 register uint32_t x=*(uint32_t*)pix;
229 s += sq[x&0xff];
230 s += sq[(x>>8)&0xff];
231 s += sq[(x>>16)&0xff];
232 s += sq[(x>>24)&0xff];
2a006cd3
FL
233 x=*(uint32_t*)(pix+4);
234 s += sq[x&0xff];
235 s += sq[(x>>8)&0xff];
236 s += sq[(x>>16)&0xff];
237 s += sq[(x>>24)&0xff];
238#endif
e463f0e9 239#endif
bb270c08
DB
240 pix += 8;
241 }
242 pix += line_size - 16;
3aa102be
MN
243 }
244 return s;
245}
246
96711ecf 247static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
3d2e8cce 248 int i;
115329f1 249
3d2e8cce 250 for(i=0; i+8<=w; i+=8){
8fc0162a
MR
251 dst[i+0]= av_bswap32(src[i+0]);
252 dst[i+1]= av_bswap32(src[i+1]);
253 dst[i+2]= av_bswap32(src[i+2]);
254 dst[i+3]= av_bswap32(src[i+3]);
255 dst[i+4]= av_bswap32(src[i+4]);
256 dst[i+5]= av_bswap32(src[i+5]);
257 dst[i+6]= av_bswap32(src[i+6]);
258 dst[i+7]= av_bswap32(src[i+7]);
3d2e8cce
MN
259 }
260 for(;i<w; i++){
8fc0162a 261 dst[i+0]= av_bswap32(src[i+0]);
3d2e8cce
MN
262 }
263}
3aa102be 264
381d37fd
MR
265static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
266{
267 while (len--)
268 *dst++ = av_bswap16(*src++);
269}
270
26efc54e
MN
271static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
272{
273 int s, i;
1d503957 274 uint32_t *sq = ff_squareTbl + 256;
26efc54e
MN
275
276 s = 0;
277 for (i = 0; i < h; i++) {
278 s += sq[pix1[0] - pix2[0]];
279 s += sq[pix1[1] - pix2[1]];
280 s += sq[pix1[2] - pix2[2]];
281 s += sq[pix1[3] - pix2[3]];
282 pix1 += line_size;
283 pix2 += line_size;
284 }
285 return s;
286}
287
bb198e19 288static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
1457ab52
MN
289{
290 int s, i;
1d503957 291 uint32_t *sq = ff_squareTbl + 256;
1457ab52
MN
292
293 s = 0;
bb198e19 294 for (i = 0; i < h; i++) {
1457ab52
MN
295 s += sq[pix1[0] - pix2[0]];
296 s += sq[pix1[1] - pix2[1]];
297 s += sq[pix1[2] - pix2[2]];
298 s += sq[pix1[3] - pix2[3]];
299 s += sq[pix1[4] - pix2[4]];
300 s += sq[pix1[5] - pix2[5]];
301 s += sq[pix1[6] - pix2[6]];
302 s += sq[pix1[7] - pix2[7]];
303 pix1 += line_size;
304 pix2 += line_size;
305 }
306 return s;
307}
308
bb198e19 309static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
9c76bd48 310{
6b026927 311 int s, i;
1d503957 312 uint32_t *sq = ff_squareTbl + 256;
9c76bd48
BF
313
314 s = 0;
bb198e19 315 for (i = 0; i < h; i++) {
6b026927
FH
316 s += sq[pix1[ 0] - pix2[ 0]];
317 s += sq[pix1[ 1] - pix2[ 1]];
318 s += sq[pix1[ 2] - pix2[ 2]];
319 s += sq[pix1[ 3] - pix2[ 3]];
320 s += sq[pix1[ 4] - pix2[ 4]];
321 s += sq[pix1[ 5] - pix2[ 5]];
322 s += sq[pix1[ 6] - pix2[ 6]];
323 s += sq[pix1[ 7] - pix2[ 7]];
324 s += sq[pix1[ 8] - pix2[ 8]];
325 s += sq[pix1[ 9] - pix2[ 9]];
326 s += sq[pix1[10] - pix2[10]];
327 s += sq[pix1[11] - pix2[11]];
328 s += sq[pix1[12] - pix2[12]];
329 s += sq[pix1[13] - pix2[13]];
330 s += sq[pix1[14] - pix2[14]];
331 s += sq[pix1[15] - pix2[15]];
2a006cd3 332
6b026927
FH
333 pix1 += line_size;
334 pix2 += line_size;
9c76bd48
BF
335 }
336 return s;
337}
338
0c1a9eda 339static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
bb270c08 340 const uint8_t *s2, int stride){
9dbcbd92
MN
341 int i;
342
343 /* read the pixels */
9dbcbd92 344 for(i=0;i<8;i++) {
c13e1abd
FH
345 block[0] = s1[0] - s2[0];
346 block[1] = s1[1] - s2[1];
347 block[2] = s1[2] - s2[2];
348 block[3] = s1[3] - s2[3];
349 block[4] = s1[4] - s2[4];
350 block[5] = s1[5] - s2[5];
351 block[6] = s1[6] - s2[6];
352 block[7] = s1[7] - s2[7];
9dbcbd92
MN
353 s1 += stride;
354 s2 += stride;
c13e1abd 355 block += 8;
9dbcbd92
MN
356 }
357}
358
359
dbc9f84e
MR
360static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
361 int line_size)
de6d9b64 362{
de6d9b64 363 int i;
115329f1 364
de6d9b64 365 /* read the pixels */
de6d9b64 366 for(i=0;i<8;i++) {
c23acbae
RB
367 pixels[0] = av_clip_uint8(block[0]);
368 pixels[1] = av_clip_uint8(block[1]);
369 pixels[2] = av_clip_uint8(block[2]);
370 pixels[3] = av_clip_uint8(block[3]);
371 pixels[4] = av_clip_uint8(block[4]);
372 pixels[5] = av_clip_uint8(block[5]);
373 pixels[6] = av_clip_uint8(block[6]);
374 pixels[7] = av_clip_uint8(block[7]);
c13e1abd
FH
375
376 pixels += line_size;
377 block += 8;
de6d9b64
FB
378 }
379}
380
dbc9f84e
MR
381static void put_signed_pixels_clamped_c(const DCTELEM *block,
382 uint8_t *restrict pixels,
383 int line_size)
f9ed9d85
MM
384{
385 int i, j;
386
387 for (i = 0; i < 8; i++) {
388 for (j = 0; j < 8; j++) {
389 if (*block < -128)
390 *pixels = 0;
391 else if (*block > 127)
392 *pixels = 255;
393 else
394 *pixels = (uint8_t)(*block + 128);
395 block++;
396 pixels++;
397 }
398 pixels += (line_size - 8);
399 }
400}
401
dbc9f84e
MR
402static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
403 int line_size)
de6d9b64 404{
de6d9b64 405 int i;
115329f1 406
de6d9b64 407 /* read the pixels */
de6d9b64 408 for(i=0;i<8;i++) {
c23acbae
RB
409 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
410 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
411 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
412 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
413 pixels[4] = av_clip_uint8(pixels[4] + block[4]);
414 pixels[5] = av_clip_uint8(pixels[5] + block[5]);
415 pixels[6] = av_clip_uint8(pixels[6] + block[6]);
416 pixels[7] = av_clip_uint8(pixels[7] + block[7]);
c13e1abd
FH
417 pixels += line_size;
418 block += 8;
de6d9b64
FB
419 }
420}
178fcca8 421
1edbfe19
LM
422static int sum_abs_dctelem_c(DCTELEM *block)
423{
424 int sum=0, i;
425 for(i=0; i<64; i++)
426 sum+= FFABS(block[i]);
427 return sum;
428}
429
342c7dfd
KS
430static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
431{
432 int i;
433
434 for (i = 0; i < h; i++) {
435 memset(block, value, 16);
436 block += line_size;
437 }
438}
439
440static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
441{
442 int i;
443
444 for (i = 0; i < h; i++) {
445 memset(block, value, 8);
446 block += line_size;
447 }
448}
449
de6d9b64
FB
450#define avg2(a,b) ((a+b+1)>>1)
451#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
452
0c1a9eda 453static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
44eb4951
MN
454{
455 const int A=(16-x16)*(16-y16);
456 const int B=( x16)*(16-y16);
457 const int C=(16-x16)*( y16);
458 const int D=( x16)*( y16);
459 int i;
44eb4951
MN
460
461 for(i=0; i<h; i++)
462 {
b3184779
MN
463 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
464 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
465 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
466 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
467 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
468 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
469 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
470 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
471 dst+= stride;
472 src+= stride;
44eb4951
MN
473 }
474}
475
703c8195 476void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
073b013d
MN
477 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
478{
479 int y, vx, vy;
480 const int s= 1<<shift;
115329f1 481
073b013d
MN
482 width--;
483 height--;
484
485 for(y=0; y<h; y++){
486 int x;
487
488 vx= ox;
489 vy= oy;
490 for(x=0; x<8; x++){ //XXX FIXME optimize
491 int src_x, src_y, frac_x, frac_y, index;
492
493 src_x= vx>>16;
494 src_y= vy>>16;
495 frac_x= src_x&(s-1);
496 frac_y= src_y&(s-1);
497 src_x>>=shift;
498 src_y>>=shift;
115329f1 499
073b013d
MN
500 if((unsigned)src_x < width){
501 if((unsigned)src_y < height){
502 index= src_x + src_y*stride;
503 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
504 + src[index +1]* frac_x )*(s-frac_y)
505 + ( src[index+stride ]*(s-frac_x)
506 + src[index+stride+1]* frac_x )* frac_y
507 + r)>>(shift*2);
508 }else{
f66e4f5f 509 index= src_x + av_clip(src_y, 0, height)*stride;
115329f1 510 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
073b013d
MN
511 + src[index +1]* frac_x )*s
512 + r)>>(shift*2);
513 }
514 }else{
515 if((unsigned)src_y < height){
f66e4f5f 516 index= av_clip(src_x, 0, width) + src_y*stride;
115329f1 517 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
073b013d
MN
518 + src[index+stride ]* frac_y )*s
519 + r)>>(shift*2);
520 }else{
f66e4f5f 521 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
073b013d
MN
522 dst[y*stride + x]= src[index ];
523 }
524 }
115329f1 525
073b013d
MN
526 vx+= dxx;
527 vy+= dyx;
528 }
529 ox += dxy;
530 oy += dyy;
531 }
532}
669ac79c
MN
533
534static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
535 switch(width){
19a0729b
OA
536 case 2: put_pixels2_8_c (dst, src, stride, height); break;
537 case 4: put_pixels4_8_c (dst, src, stride, height); break;
538 case 8: put_pixels8_8_c (dst, src, stride, height); break;
539 case 16:put_pixels16_8_c(dst, src, stride, height); break;
669ac79c
MN
540 }
541}
542
543static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
544 int i,j;
545 for (i=0; i < height; i++) {
546 for (j=0; j < width; j++) {
bb270c08 547 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
669ac79c
MN
548 }
549 src += stride;
550 dst += stride;
551 }
552}
553
554static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
555 int i,j;
556 for (i=0; i < height; i++) {
557 for (j=0; j < width; j++) {
bb270c08 558 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
669ac79c
MN
559 }
560 src += stride;
561 dst += stride;
562 }
563}
115329f1 564
669ac79c
MN
565static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
566 int i,j;
567 for (i=0; i < height; i++) {
568 for (j=0; j < width; j++) {
bb270c08 569 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
669ac79c
MN
570 }
571 src += stride;
572 dst += stride;
573 }
574}
115329f1 575
669ac79c
MN
576static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
577 int i,j;
578 for (i=0; i < height; i++) {
579 for (j=0; j < width; j++) {
bb270c08 580 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
581 }
582 src += stride;
583 dst += stride;
584 }
585}
586
587static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
588 int i,j;
589 for (i=0; i < height; i++) {
590 for (j=0; j < width; j++) {
bb270c08 591 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
592 }
593 src += stride;
594 dst += stride;
595 }
596}
597
598static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
599 int i,j;
600 for (i=0; i < height; i++) {
601 for (j=0; j < width; j++) {
bb270c08 602 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
669ac79c
MN
603 }
604 src += stride;
605 dst += stride;
606 }
607}
608
609static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
610 int i,j;
611 for (i=0; i < height; i++) {
612 for (j=0; j < width; j++) {
bb270c08 613 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
614 }
615 src += stride;
616 dst += stride;
617 }
618}
619
620static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
621 int i,j;
622 for (i=0; i < height; i++) {
623 for (j=0; j < width; j++) {
bb270c08 624 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
625 }
626 src += stride;
627 dst += stride;
628 }
629}
da3b9756
MM
630
631static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
632 switch(width){
19a0729b
OA
633 case 2: avg_pixels2_8_c (dst, src, stride, height); break;
634 case 4: avg_pixels4_8_c (dst, src, stride, height); break;
635 case 8: avg_pixels8_8_c (dst, src, stride, height); break;
636 case 16:avg_pixels16_8_c(dst, src, stride, height); break;
da3b9756
MM
637 }
638}
639
640static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
641 int i,j;
642 for (i=0; i < height; i++) {
643 for (j=0; j < width; j++) {
bb270c08 644 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
645 }
646 src += stride;
647 dst += stride;
648 }
649}
650
651static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
652 int i,j;
653 for (i=0; i < height; i++) {
654 for (j=0; j < width; j++) {
bb270c08 655 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
656 }
657 src += stride;
658 dst += stride;
659 }
660}
115329f1 661
da3b9756
MM
662static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
663 int i,j;
664 for (i=0; i < height; i++) {
665 for (j=0; j < width; j++) {
bb270c08 666 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
667 }
668 src += stride;
669 dst += stride;
670 }
671}
115329f1 672
da3b9756
MM
673static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
674 int i,j;
675 for (i=0; i < height; i++) {
676 for (j=0; j < width; j++) {
bb270c08 677 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
678 }
679 src += stride;
680 dst += stride;
681 }
682}
683
684static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
685 int i,j;
686 for (i=0; i < height; i++) {
687 for (j=0; j < width; j++) {
bb270c08 688 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
689 }
690 src += stride;
691 dst += stride;
692 }
693}
694
695static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
696 int i,j;
697 for (i=0; i < height; i++) {
698 for (j=0; j < width; j++) {
bb270c08 699 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
700 }
701 src += stride;
702 dst += stride;
703 }
704}
705
706static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
707 int i,j;
708 for (i=0; i < height; i++) {
709 for (j=0; j < width; j++) {
bb270c08 710 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
711 }
712 src += stride;
713 dst += stride;
714 }
715}
716
717static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
718 int i,j;
719 for (i=0; i < height; i++) {
720 for (j=0; j < width; j++) {
bb270c08 721 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
722 }
723 src += stride;
724 dst += stride;
725 }
726}
669ac79c 727
b3184779 728#define QPEL_MC(r, OPNAME, RND, OP) \
0c1a9eda 729static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
55fde95e 730 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779
MN
731 int i;\
732 for(i=0; i<h; i++)\
733 {\
734 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
735 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
736 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
737 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
738 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
739 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
740 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
741 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
742 dst+=dstStride;\
743 src+=srcStride;\
744 }\
44eb4951
MN
745}\
746\
0c1a9eda 747static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
db794953 748 const int w=8;\
55fde95e 749 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779
MN
750 int i;\
751 for(i=0; i<w; i++)\
752 {\
753 const int src0= src[0*srcStride];\
754 const int src1= src[1*srcStride];\
755 const int src2= src[2*srcStride];\
756 const int src3= src[3*srcStride];\
757 const int src4= src[4*srcStride];\
758 const int src5= src[5*srcStride];\
759 const int src6= src[6*srcStride];\
760 const int src7= src[7*srcStride];\
761 const int src8= src[8*srcStride];\
762 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
763 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
764 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
765 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
766 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
767 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
768 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
769 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
770 dst++;\
771 src++;\
772 }\
773}\
774\
0c1a9eda 775static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
55fde95e 776 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779 777 int i;\
826f429a 778 \
b3184779
MN
779 for(i=0; i<h; i++)\
780 {\
781 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
782 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
783 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
784 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
785 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
786 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
787 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
788 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
789 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
790 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
791 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
792 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
793 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
794 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
795 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
796 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
797 dst+=dstStride;\
798 src+=srcStride;\
799 }\
800}\
801\
0c1a9eda 802static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
55fde95e 803 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779 804 int i;\
826f429a 805 const int w=16;\
b3184779
MN
806 for(i=0; i<w; i++)\
807 {\
808 const int src0= src[0*srcStride];\
809 const int src1= src[1*srcStride];\
810 const int src2= src[2*srcStride];\
811 const int src3= src[3*srcStride];\
812 const int src4= src[4*srcStride];\
813 const int src5= src[5*srcStride];\
814 const int src6= src[6*srcStride];\
815 const int src7= src[7*srcStride];\
816 const int src8= src[8*srcStride];\
817 const int src9= src[9*srcStride];\
818 const int src10= src[10*srcStride];\
819 const int src11= src[11*srcStride];\
820 const int src12= src[12*srcStride];\
821 const int src13= src[13*srcStride];\
822 const int src14= src[14*srcStride];\
823 const int src15= src[15*srcStride];\
824 const int src16= src[16*srcStride];\
825 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
826 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
827 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
828 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
829 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
830 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
831 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
832 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
833 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
834 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
835 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
836 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
837 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
838 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
839 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
840 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
841 dst++;\
842 src++;\
843 }\
844}\
845\
0c1a9eda
ZK
846static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
847 uint8_t half[64];\
b3184779 848 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
19a0729b 849 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
44eb4951
MN
850}\
851\
0c1a9eda 852static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
b3184779 853 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
44eb4951
MN
854}\
855\
0c1a9eda
ZK
856static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
857 uint8_t half[64];\
b3184779 858 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
19a0729b 859 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
44eb4951
MN
860}\
861\
0c1a9eda
ZK
862static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
863 uint8_t full[16*9];\
864 uint8_t half[64];\
b3184779 865 copy_block9(full, src, 16, stride, 9);\
db794953 866 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
19a0729b 867 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
44eb4951
MN
868}\
869\
0c1a9eda
ZK
870static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
871 uint8_t full[16*9];\
b3184779 872 copy_block9(full, src, 16, stride, 9);\
db794953 873 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
44eb4951
MN
874}\
875\
0c1a9eda
ZK
876static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
877 uint8_t full[16*9];\
878 uint8_t half[64];\
b3184779 879 copy_block9(full, src, 16, stride, 9);\
db794953 880 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
19a0729b 881 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
44eb4951 882}\
0c1a9eda
ZK
883void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
884 uint8_t full[16*9];\
885 uint8_t halfH[72];\
886 uint8_t halfV[64];\
887 uint8_t halfHV[64];\
b3184779
MN
888 copy_block9(full, src, 16, stride, 9);\
889 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
890 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
891 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 892 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 893}\
0c1a9eda
ZK
894static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
895 uint8_t full[16*9];\
896 uint8_t halfH[72];\
897 uint8_t halfHV[64];\
db794953
MN
898 copy_block9(full, src, 16, stride, 9);\
899 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
19a0729b 900 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
db794953 901 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 902 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
db794953 903}\
0c1a9eda
ZK
904void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
905 uint8_t full[16*9];\
906 uint8_t halfH[72];\
907 uint8_t halfV[64];\
908 uint8_t halfHV[64];\
b3184779
MN
909 copy_block9(full, src, 16, stride, 9);\
910 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
911 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
912 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 913 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 914}\
0c1a9eda
ZK
915static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
916 uint8_t full[16*9];\
917 uint8_t halfH[72];\
918 uint8_t halfHV[64];\
db794953
MN
919 copy_block9(full, src, 16, stride, 9);\
920 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
19a0729b 921 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
db794953 922 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 923 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
db794953 924}\
0c1a9eda
ZK
925void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
926 uint8_t full[16*9];\
927 uint8_t halfH[72];\
928 uint8_t halfV[64];\
929 uint8_t halfHV[64];\
b3184779
MN
930 copy_block9(full, src, 16, stride, 9);\
931 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
932 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
933 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 934 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 935}\
0c1a9eda
ZK
936static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
937 uint8_t full[16*9];\
938 uint8_t halfH[72];\
939 uint8_t halfHV[64];\
db794953
MN
940 copy_block9(full, src, 16, stride, 9);\
941 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
19a0729b 942 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
db794953 943 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 944 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
db794953 945}\
0c1a9eda
ZK
946void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
947 uint8_t full[16*9];\
948 uint8_t halfH[72];\
949 uint8_t halfV[64];\
950 uint8_t halfHV[64];\
b3184779
MN
951 copy_block9(full, src, 16, stride, 9);\
952 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
db794953
MN
953 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
954 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 955 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 956}\
0c1a9eda
ZK
957static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
958 uint8_t full[16*9];\
959 uint8_t halfH[72];\
960 uint8_t halfHV[64];\
db794953
MN
961 copy_block9(full, src, 16, stride, 9);\
962 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
19a0729b 963 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
db794953 964 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 965 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
db794953 966}\
0c1a9eda
ZK
967static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
968 uint8_t halfH[72];\
969 uint8_t halfHV[64];\
b3184779 970 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 971 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 972 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
44eb4951 973}\
0c1a9eda
ZK
974static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
975 uint8_t halfH[72];\
976 uint8_t halfHV[64];\
b3184779 977 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 978 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 979 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
44eb4951 980}\
0c1a9eda
ZK
981void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
982 uint8_t full[16*9];\
983 uint8_t halfH[72];\
984 uint8_t halfV[64];\
985 uint8_t halfHV[64];\
b3184779
MN
986 copy_block9(full, src, 16, stride, 9);\
987 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
988 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
989 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 990 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 991}\
0c1a9eda
ZK
992static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
993 uint8_t full[16*9];\
994 uint8_t halfH[72];\
db794953
MN
995 copy_block9(full, src, 16, stride, 9);\
996 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
19a0729b 997 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
db794953
MN
998 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
999}\
0c1a9eda
ZK
1000void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1001 uint8_t full[16*9];\
1002 uint8_t halfH[72];\
1003 uint8_t halfV[64];\
1004 uint8_t halfHV[64];\
b3184779
MN
1005 copy_block9(full, src, 16, stride, 9);\
1006 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1007 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1008 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
19a0729b 1009 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 1010}\
0c1a9eda
ZK
1011static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1012 uint8_t full[16*9];\
1013 uint8_t halfH[72];\
db794953
MN
1014 copy_block9(full, src, 16, stride, 9);\
1015 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
19a0729b 1016 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
db794953
MN
1017 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1018}\
0c1a9eda
ZK
1019static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1020 uint8_t halfH[72];\
b3184779 1021 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1022 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
b3184779 1023}\
b3184779 1024\
0c1a9eda
ZK
1025static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1026 uint8_t half[256];\
b3184779 1027 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
19a0729b 1028 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
b3184779
MN
1029}\
1030\
0c1a9eda 1031static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
b3184779 1032 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
44eb4951 1033}\
b3184779 1034\
0c1a9eda
ZK
1035static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1036 uint8_t half[256];\
b3184779 1037 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
19a0729b 1038 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
b3184779
MN
1039}\
1040\
0c1a9eda
ZK
1041static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1042 uint8_t full[24*17];\
1043 uint8_t half[256];\
b3184779 1044 copy_block17(full, src, 24, stride, 17);\
826f429a 1045 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
19a0729b 1046 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
b3184779
MN
1047}\
1048\
0c1a9eda
ZK
1049static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1050 uint8_t full[24*17];\
b3184779 1051 copy_block17(full, src, 24, stride, 17);\
826f429a 1052 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
b3184779
MN
1053}\
1054\
0c1a9eda
ZK
1055static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1056 uint8_t full[24*17];\
1057 uint8_t half[256];\
b3184779 1058 copy_block17(full, src, 24, stride, 17);\
826f429a 1059 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
19a0729b 1060 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
b3184779 1061}\
0c1a9eda
ZK
1062void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1063 uint8_t full[24*17];\
1064 uint8_t halfH[272];\
1065 uint8_t halfV[256];\
1066 uint8_t halfHV[256];\
b3184779
MN
1067 copy_block17(full, src, 24, stride, 17);\
1068 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1069 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1070 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1071 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
b3184779 1072}\
0c1a9eda
ZK
1073static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1074 uint8_t full[24*17];\
1075 uint8_t halfH[272];\
1076 uint8_t halfHV[256];\
db794953
MN
1077 copy_block17(full, src, 24, stride, 17);\
1078 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
19a0729b 1079 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
db794953 1080 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1081 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
db794953 1082}\
0c1a9eda
ZK
1083void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1084 uint8_t full[24*17];\
1085 uint8_t halfH[272];\
1086 uint8_t halfV[256];\
1087 uint8_t halfHV[256];\
b3184779
MN
1088 copy_block17(full, src, 24, stride, 17);\
1089 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1090 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1091 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1092 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
b3184779 1093}\
0c1a9eda
ZK
1094static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1095 uint8_t full[24*17];\
1096 uint8_t halfH[272];\
1097 uint8_t halfHV[256];\
db794953
MN
1098 copy_block17(full, src, 24, stride, 17);\
1099 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
19a0729b 1100 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
db794953 1101 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1102 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
db794953 1103}\
0c1a9eda
ZK
1104void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1105 uint8_t full[24*17];\
1106 uint8_t halfH[272];\
1107 uint8_t halfV[256];\
1108 uint8_t halfHV[256];\
b3184779
MN
1109 copy_block17(full, src, 24, stride, 17);\
1110 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1111 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1112 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1113 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
b3184779 1114}\
0c1a9eda
ZK
1115static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1116 uint8_t full[24*17];\
1117 uint8_t halfH[272];\
1118 uint8_t halfHV[256];\
db794953
MN
1119 copy_block17(full, src, 24, stride, 17);\
1120 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
19a0729b 1121 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
db794953 1122 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1123 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
db794953 1124}\
0c1a9eda
ZK
1125void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1126 uint8_t full[24*17];\
1127 uint8_t halfH[272];\
1128 uint8_t halfV[256];\
1129 uint8_t halfHV[256];\
b3184779
MN
1130 copy_block17(full, src, 24, stride, 17);\
1131 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
826f429a
MN
1132 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1133 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1134 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
b3184779 1135}\
0c1a9eda
ZK
1136static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1137 uint8_t full[24*17];\
1138 uint8_t halfH[272];\
1139 uint8_t halfHV[256];\
db794953
MN
1140 copy_block17(full, src, 24, stride, 17);\
1141 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
19a0729b 1142 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
db794953 1143 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1144 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
db794953 1145}\
0c1a9eda
ZK
1146static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1147 uint8_t halfH[272];\
1148 uint8_t halfHV[256];\
b3184779 1149 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1150 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1151 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
b3184779 1152}\
0c1a9eda
ZK
1153static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1154 uint8_t halfH[272];\
1155 uint8_t halfHV[256];\
b3184779 1156 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1157 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1158 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
b3184779 1159}\
0c1a9eda
ZK
1160void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1161 uint8_t full[24*17];\
1162 uint8_t halfH[272];\
1163 uint8_t halfV[256];\
1164 uint8_t halfHV[256];\
b3184779
MN
1165 copy_block17(full, src, 24, stride, 17);\
1166 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1167 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1168 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1169 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
b3184779 1170}\
0c1a9eda
ZK
1171static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1172 uint8_t full[24*17];\
1173 uint8_t halfH[272];\
db794953
MN
1174 copy_block17(full, src, 24, stride, 17);\
1175 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
19a0729b 1176 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
db794953
MN
1177 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1178}\
0c1a9eda
ZK
1179void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1180 uint8_t full[24*17];\
1181 uint8_t halfH[272];\
1182 uint8_t halfV[256];\
1183 uint8_t halfHV[256];\
b3184779
MN
1184 copy_block17(full, src, 24, stride, 17);\
1185 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
1186 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1187 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
19a0729b 1188 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
b3184779 1189}\
0c1a9eda
ZK
1190static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1191 uint8_t full[24*17];\
1192 uint8_t halfH[272];\
db794953
MN
1193 copy_block17(full, src, 24, stride, 17);\
1194 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
19a0729b 1195 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
db794953
MN
1196 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1197}\
0c1a9eda
ZK
1198static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1199 uint8_t halfH[272];\
b3184779 1200 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 1201 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
45553457 1202}
44eb4951 1203
b3184779
MN
1204#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1205#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1206#define op_put(a, b) a = cm[((b) + 16)>>5]
1207#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1208
1209QPEL_MC(0, put_ , _ , op_put)
1210QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1211QPEL_MC(0, avg_ , _ , op_avg)
1212//QPEL_MC(1, avg_no_rnd , _ , op_avg)
1213#undef op_avg
1214#undef op_avg_no_rnd
1215#undef op_put
1216#undef op_put_no_rnd
44eb4951 1217
3d1b1caa
MR
1218#define put_qpel8_mc00_c ff_put_pixels8x8_c
1219#define avg_qpel8_mc00_c ff_avg_pixels8x8_c
1220#define put_qpel16_mc00_c ff_put_pixels16x16_c
1221#define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1222#define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
19a0729b 1223#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
3d1b1caa 1224
1457ab52 1225static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
55fde95e 1226 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1457ab52
MN
1227 int i;
1228
1229 for(i=0; i<h; i++){
1230 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1231 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1232 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1233 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1234 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1235 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1236 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1237 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1238 dst+=dstStride;
115329f1 1239 src+=srcStride;
1457ab52
MN
1240 }
1241}
1242
b250f9c6 1243#if CONFIG_RV40_DECODER
d241f51e 1244void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
19a0729b 1245 put_pixels16_xy2_8_c(dst, src, stride, 16);
2d8a0815 1246}
d241f51e 1247void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
19a0729b 1248 avg_pixels16_xy2_8_c(dst, src, stride, 16);
2d8a0815 1249}
d241f51e 1250void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
19a0729b 1251 put_pixels8_xy2_8_c(dst, src, stride, 8);
2d8a0815 1252}
d241f51e 1253void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
19a0729b 1254 avg_pixels8_xy2_8_c(dst, src, stride, 8);
2d8a0815 1255}
2d8a0815
KS
1256#endif /* CONFIG_RV40_DECODER */
1257
1457ab52 1258static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
55fde95e 1259 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1457ab52
MN
1260 int i;
1261
1262 for(i=0; i<w; i++){
1263 const int src_1= src[ -srcStride];
1264 const int src0 = src[0 ];
1265 const int src1 = src[ srcStride];
1266 const int src2 = src[2*srcStride];
1267 const int src3 = src[3*srcStride];
1268 const int src4 = src[4*srcStride];
1269 const int src5 = src[5*srcStride];
1270 const int src6 = src[6*srcStride];
1271 const int src7 = src[7*srcStride];
1272 const int src8 = src[8*srcStride];
1273 const int src9 = src[9*srcStride];
1274 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1275 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1276 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1277 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1278 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1279 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1280 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1281 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1282 src++;
1283 dst++;
1284 }
1285}
1286
1457ab52
MN
1287static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
1288 uint8_t half[64];
1289 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
19a0729b 1290 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
1457ab52
MN
1291}
1292
1293static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
1294 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
1295}
1296
1297static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
1298 uint8_t half[64];
1299 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
19a0729b 1300 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
1457ab52
MN
1301}
1302
1303static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
1304 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
1305}
1306
1307static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
1308 uint8_t halfH[88];
1309 uint8_t halfV[64];
1310 uint8_t halfHV[64];
1311 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1312 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1313 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
19a0729b 1314 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1457ab52
MN
1315}
1316static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
1317 uint8_t halfH[88];
1318 uint8_t halfV[64];
1319 uint8_t halfHV[64];
1320 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1321 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1322 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
19a0729b 1323 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1457ab52
MN
1324}
1325static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
1326 uint8_t halfH[88];
1327 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1328 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
1329}
1330
332f9ac4 1331static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
4052cbf1 1332 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
332f9ac4
MN
1333 int x;
1334 const int strength= ff_h263_loop_filter_strength[qscale];
115329f1 1335
332f9ac4
MN
1336 for(x=0; x<8; x++){
1337 int d1, d2, ad1;
1338 int p0= src[x-2*stride];
1339 int p1= src[x-1*stride];
1340 int p2= src[x+0*stride];
1341 int p3= src[x+1*stride];
1342 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1343
1344 if (d<-2*strength) d1= 0;
1345 else if(d<- strength) d1=-2*strength - d;
1346 else if(d< strength) d1= d;
1347 else if(d< 2*strength) d1= 2*strength - d;
1348 else d1= 0;
115329f1 1349
332f9ac4
MN
1350 p1 += d1;
1351 p2 -= d1;
1352 if(p1&256) p1= ~(p1>>31);
1353 if(p2&256) p2= ~(p2>>31);
115329f1 1354
332f9ac4
MN
1355 src[x-1*stride] = p1;
1356 src[x+0*stride] = p2;
1357
c26abfa5 1358 ad1= FFABS(d1)>>1;
115329f1 1359
f66e4f5f 1360 d2= av_clip((p0-p3)/4, -ad1, ad1);
115329f1 1361
332f9ac4
MN
1362 src[x-2*stride] = p0 - d2;
1363 src[x+ stride] = p3 + d2;
1364 }
73f51a4d 1365 }
332f9ac4
MN
1366}
1367
1368static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
4052cbf1 1369 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
332f9ac4
MN
1370 int y;
1371 const int strength= ff_h263_loop_filter_strength[qscale];
115329f1 1372
332f9ac4
MN
1373 for(y=0; y<8; y++){
1374 int d1, d2, ad1;
1375 int p0= src[y*stride-2];
1376 int p1= src[y*stride-1];
1377 int p2= src[y*stride+0];
1378 int p3= src[y*stride+1];
1379 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1380
1381 if (d<-2*strength) d1= 0;
1382 else if(d<- strength) d1=-2*strength - d;
1383 else if(d< strength) d1= d;
1384 else if(d< 2*strength) d1= 2*strength - d;
1385 else d1= 0;
115329f1 1386
332f9ac4
MN
1387 p1 += d1;
1388 p2 -= d1;
1389 if(p1&256) p1= ~(p1>>31);
1390 if(p2&256) p2= ~(p2>>31);
115329f1 1391
332f9ac4
MN
1392 src[y*stride-1] = p1;
1393 src[y*stride+0] = p2;
1394
c26abfa5 1395 ad1= FFABS(d1)>>1;
115329f1 1396
f66e4f5f 1397 d2= av_clip((p0-p3)/4, -ad1, ad1);
115329f1 1398
332f9ac4
MN
1399 src[y*stride-2] = p0 - d2;
1400 src[y*stride+1] = p3 + d2;
1401 }
73f51a4d 1402 }
332f9ac4 1403}
1457ab52 1404
fdbbf2e0
MN
1405static void h261_loop_filter_c(uint8_t *src, int stride){
1406 int x,y,xy,yz;
1407 int temp[64];
1408
1409 for(x=0; x<8; x++){
1410 temp[x ] = 4*src[x ];
1411 temp[x + 7*8] = 4*src[x + 7*stride];
1412 }
1413 for(y=1; y<7; y++){
1414 for(x=0; x<8; x++){
1415 xy = y * stride + x;
1416 yz = y * 8 + x;
1417 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
c6148de2
MN
1418 }
1419 }
115329f1 1420
fdbbf2e0
MN
1421 for(y=0; y<8; y++){
1422 src[ y*stride] = (temp[ y*8] + 2)>>2;
1423 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
1424 for(x=1; x<7; x++){
1425 xy = y * stride + x;
1426 yz = y * 8 + x;
1427 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
c6148de2
MN
1428 }
1429 }
1430}
1431
bb198e19 1432static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
1433{
1434 int s, i;
1435
1436 s = 0;
bb198e19 1437 for(i=0;i<h;i++) {
de6d9b64
FB
1438 s += abs(pix1[0] - pix2[0]);
1439 s += abs(pix1[1] - pix2[1]);
1440 s += abs(pix1[2] - pix2[2]);
1441 s += abs(pix1[3] - pix2[3]);
1442 s += abs(pix1[4] - pix2[4]);
1443 s += abs(pix1[5] - pix2[5]);
1444 s += abs(pix1[6] - pix2[6]);
1445 s += abs(pix1[7] - pix2[7]);
1446 s += abs(pix1[8] - pix2[8]);
1447 s += abs(pix1[9] - pix2[9]);
1448 s += abs(pix1[10] - pix2[10]);
1449 s += abs(pix1[11] - pix2[11]);
1450 s += abs(pix1[12] - pix2[12]);
1451 s += abs(pix1[13] - pix2[13]);
1452 s += abs(pix1[14] - pix2[14]);
1453 s += abs(pix1[15] - pix2[15]);
1454 pix1 += line_size;
1455 pix2 += line_size;
1456 }
1457 return s;
1458}
1459
bb198e19 1460static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
1461{
1462 int s, i;
1463
1464 s = 0;
bb198e19 1465 for(i=0;i<h;i++) {
de6d9b64
FB
1466 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1467 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1468 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1469 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1470 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1471 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1472 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1473 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1474 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
1475 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
1476 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
1477 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
1478 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
1479 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
1480 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
1481 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
1482 pix1 += line_size;
1483 pix2 += line_size;
1484 }
1485 return s;
1486}
1487
bb198e19 1488static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
1489{
1490 int s, i;
0c1a9eda 1491 uint8_t *pix3 = pix2 + line_size;
de6d9b64
FB
1492
1493 s = 0;
bb198e19 1494 for(i=0;i<h;i++) {
de6d9b64
FB
1495 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1496 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1497 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1498 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1499 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1500 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1501 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1502 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1503 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
1504 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
1505 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
1506 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
1507 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
1508 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
1509 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
1510 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
1511 pix1 += line_size;
1512 pix2 += line_size;
1513 pix3 += line_size;
1514 }
1515 return s;
1516}
1517
bb198e19 1518static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
1519{
1520 int s, i;
0c1a9eda 1521 uint8_t *pix3 = pix2 + line_size;
de6d9b64
FB
1522
1523 s = 0;
bb198e19 1524 for(i=0;i<h;i++) {
de6d9b64
FB
1525 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1526 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1527 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1528 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1529 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1530 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1531 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1532 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1533 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1534 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1535 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1536 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1537 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1538 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1539 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1540 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1541 pix1 += line_size;
1542 pix2 += line_size;
1543 pix3 += line_size;
1544 }
1545 return s;
1546}
1547
bb198e19 1548static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
1549{
1550 int s, i;
1551
1552 s = 0;
bb198e19 1553 for(i=0;i<h;i++) {
ba6802de
MN
1554 s += abs(pix1[0] - pix2[0]);
1555 s += abs(pix1[1] - pix2[1]);
1556 s += abs(pix1[2] - pix2[2]);
1557 s += abs(pix1[3] - pix2[3]);
1558 s += abs(pix1[4] - pix2[4]);
1559 s += abs(pix1[5] - pix2[5]);
1560 s += abs(pix1[6] - pix2[6]);
1561 s += abs(pix1[7] - pix2[7]);
1562 pix1 += line_size;
1563 pix2 += line_size;
1564 }
1565 return s;
1566}
1567
bb198e19 1568static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
1569{
1570 int s, i;
1571
1572 s = 0;
bb198e19 1573 for(i=0;i<h;i++) {
ba6802de
MN
1574 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1575 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1576 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1577 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1578 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1579 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1580 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1581 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1582 pix1 += line_size;
1583 pix2 += line_size;
1584 }
1585 return s;
1586}
1587
bb198e19 1588static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
1589{
1590 int s, i;
0c1a9eda 1591 uint8_t *pix3 = pix2 + line_size;
ba6802de
MN
1592
1593 s = 0;
bb198e19 1594 for(i=0;i<h;i++) {
ba6802de
MN
1595 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1596 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1597 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1598 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1599 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1600 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1601 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1602 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1603 pix1 += line_size;
1604 pix2 += line_size;
1605 pix3 += line_size;
1606 }
1607 return s;
1608}
1609
bb198e19 1610static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
1611{
1612 int s, i;
0c1a9eda 1613 uint8_t *pix3 = pix2 + line_size;
ba6802de
MN
1614
1615 s = 0;
bb198e19 1616 for(i=0;i<h;i++) {
ba6802de
MN
1617 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1618 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1619 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1620 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1621 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1622 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1623 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1624 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1625 pix1 += line_size;
1626 pix2 += line_size;
1627 pix3 += line_size;
1628 }
1629 return s;
1630}
1631
bf4e3bd2
MR
1632static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1633 MpegEncContext *c = v;
e6a2ac34
MN
1634 int score1=0;
1635 int score2=0;
1636 int x,y;
d4c5d2ad 1637
e6a2ac34
MN
1638 for(y=0; y<h; y++){
1639 for(x=0; x<16; x++){
1640 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1641 }
1642 if(y+1<h){
1643 for(x=0; x<15; x++){
c26abfa5 1644 score2+= FFABS( s1[x ] - s1[x +stride]
e6a2ac34 1645 - s1[x+1] + s1[x+1+stride])
c26abfa5 1646 -FFABS( s2[x ] - s2[x +stride]
e6a2ac34
MN
1647 - s2[x+1] + s2[x+1+stride]);
1648 }
1649 }
1650 s1+= stride;
1651 s2+= stride;
1652 }
d4c5d2ad 1653
c26abfa5
DB
1654 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1655 else return score1 + FFABS(score2)*8;
e6a2ac34
MN
1656}
1657
bf4e3bd2
MR
1658static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1659 MpegEncContext *c = v;
e6a2ac34
MN
1660 int score1=0;
1661 int score2=0;
1662 int x,y;
115329f1 1663
e6a2ac34
MN
1664 for(y=0; y<h; y++){
1665 for(x=0; x<8; x++){
1666 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1667 }
1668 if(y+1<h){
1669 for(x=0; x<7; x++){
c26abfa5 1670 score2+= FFABS( s1[x ] - s1[x +stride]
e6a2ac34 1671 - s1[x+1] + s1[x+1+stride])
c26abfa5 1672 -FFABS( s2[x ] - s2[x +stride]
e6a2ac34
MN
1673 - s2[x+1] + s2[x+1+stride]);
1674 }
1675 }
1676 s1+= stride;
1677 s2+= stride;
1678 }
115329f1 1679
c26abfa5
DB
1680 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1681 else return score1 + FFABS(score2)*8;
e6a2ac34
MN
1682}
1683
364a1797
MN
1684static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
1685 int i;
1686 unsigned int sum=0;
1687
1688 for(i=0; i<8*8; i++){
1689 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
1690 int w= weight[i];
1691 b>>= RECON_SHIFT;
1692 assert(-512<b && b<512);
1693
1694 sum += (w*b)*(w*b)>>4;
1695 }
1696 return sum>>2;
1697}
1698
1699static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
1700 int i;
1701
1702 for(i=0; i<8*8; i++){
1703 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
115329f1 1704 }
364a1797
MN
1705}
1706
a9badb51 1707/**
58c42af7 1708 * Permute an 8x8 block.
2a5700de 1709 * @param block the block which will be permuted according to the given permutation vector
a9badb51
MN
1710 * @param permutation the permutation vector
1711 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
115329f1 1712 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
2a5700de 1713 * (inverse) permutated to scantable order!
a9badb51 1714 */
0c1a9eda 1715void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
d962f6fd 1716{
7801d21d 1717 int i;
477ab036 1718 DCTELEM temp[64];
115329f1 1719
7801d21d 1720 if(last<=0) return;
90b5b51e 1721 //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
d962f6fd 1722
7801d21d
MN
1723 for(i=0; i<=last; i++){
1724 const int j= scantable[i];
1725 temp[j]= block[j];
1726 block[j]=0;
1727 }
115329f1 1728
7801d21d
MN
1729 for(i=0; i<=last; i++){
1730 const int j= scantable[i];
1731 const int perm_j= permutation[j];
1732 block[perm_j]= temp[j];
1733 }
d962f6fd 1734}
e0eac44e 1735
622348f9
MN
1736static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
1737 return 0;
1738}
1739
1740void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
1741 int i;
115329f1 1742
3899eb2f 1743 memset(cmp, 0, sizeof(void*)*6);
115329f1 1744
3899eb2f 1745 for(i=0; i<6; i++){
622348f9
MN
1746 switch(type&0xFF){
1747 case FF_CMP_SAD:
1748 cmp[i]= c->sad[i];
1749 break;
1750 case FF_CMP_SATD:
1751 cmp[i]= c->hadamard8_diff[i];
1752 break;
1753 case FF_CMP_SSE:
1754 cmp[i]= c->sse[i];
1755 break;
1756 case FF_CMP_DCT:
1757 cmp[i]= c->dct_sad[i];
1758 break;
27c61ac5
MN
1759 case FF_CMP_DCT264:
1760 cmp[i]= c->dct264_sad[i];
1761 break;
0fd6aea1
MN
1762 case FF_CMP_DCTMAX:
1763 cmp[i]= c->dct_max[i];
1764 break;
622348f9
MN
1765 case FF_CMP_PSNR:
1766 cmp[i]= c->quant_psnr[i];
1767 break;
1768 case FF_CMP_BIT:
1769 cmp[i]= c->bit[i];
1770 break;
1771 case FF_CMP_RD:
1772 cmp[i]= c->rd[i];
1773 break;
1774 case FF_CMP_VSAD:
1775 cmp[i]= c->vsad[i];
1776 break;
1777 case FF_CMP_VSSE:
1778 cmp[i]= c->vsse[i];
1779 break;
1780 case FF_CMP_ZERO:
1781 cmp[i]= zero_cmp;
1782 break;
e6a2ac34
MN
1783 case FF_CMP_NSSE:
1784 cmp[i]= c->nsse[i];
1785 break;
622348f9
MN
1786 default:
1787 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
1788 }
1789 }
1790}
1791
11f18faf 1792static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
469bd7b1
LM
1793 long i;
1794 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1795 long a = *(long*)(src+i);
1796 long b = *(long*)(dst+i);
1797 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
11f18faf
MN
1798 }
1799 for(; i<w; i++)
1800 dst[i+0] += src[i+0];
1801}
1802
1803static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
469bd7b1 1804 long i;
b250f9c6 1805#if !HAVE_FAST_UNALIGNED
469bd7b1 1806 if((long)src2 & (sizeof(long)-1)){
31304587
LM
1807 for(i=0; i+7<w; i+=8){
1808 dst[i+0] = src1[i+0]-src2[i+0];
1809 dst[i+1] = src1[i+1]-src2[i+1];
1810 dst[i+2] = src1[i+2]-src2[i+2];
1811 dst[i+3] = src1[i+3]-src2[i+3];
1812 dst[i+4] = src1[i+4]-src2[i+4];
1813 dst[i+5] = src1[i+5]-src2[i+5];
1814 dst[i+6] = src1[i+6]-src2[i+6];
1815 dst[i+7] = src1[i+7]-src2[i+7];
1816 }
469bd7b1
LM
1817 }else
1818#endif
1819 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1820 long a = *(long*)(src1+i);
1821 long b = *(long*)(src2+i);
1822 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
1823 }
11f18faf
MN
1824 for(; i<w; i++)
1825 dst[i+0] = src1[i+0]-src2[i+0];
1826}
1827
e17ccf60 1828static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
3daa434a
LM
1829 int i;
1830 uint8_t l, lt;
1831
1832 l= *left;
1833 lt= *left_top;
1834
1835 for(i=0; i<w; i++){
1836 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
1837 lt= src1[i];
1838 dst[i]= l;
1839 }
1840
1841 *left= l;
1842 *left_top= lt;
1843}
1844
e17ccf60 1845static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
84705403
MN
1846 int i;
1847 uint8_t l, lt;
1848
1849 l= *left;
1850 lt= *left_top;
1851
1852 for(i=0; i<w; i++){
1853 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
1854 lt= src1[i];
1855 l= src2[i];
1856 dst[i]= l - pred;
115329f1 1857 }
84705403
MN
1858
1859 *left= l;
1860 *left_top= lt;
1861}
1862
2d4bbdec 1863static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
73c6f598
NC
1864 int i;
1865
1866 for(i=0; i<w-1; i++){
1867 acc+= src[i];
1868 dst[i]= acc;
1869 i++;
1870 acc+= src[i];
1871 dst[i]= acc;
1872 }
1873
1874 for(; i<w; i++){
1875 acc+= src[i];
1876 dst[i]= acc;
1877 }
1878
1879 return acc;
1880}
1881
1882#if HAVE_BIGENDIAN
1883#define B 3
1884#define G 2
1885#define R 1
f267d3ac 1886#define A 0
73c6f598
NC
1887#else
1888#define B 0
1889#define G 1
1890#define R 2
f267d3ac 1891#define A 3
73c6f598 1892#endif
f267d3ac 1893static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
73c6f598 1894 int i;
f267d3ac 1895 int r,g,b,a;
73c6f598
NC
1896 r= *red;
1897 g= *green;
1898 b= *blue;
f267d3ac 1899 a= *alpha;
73c6f598
NC
1900
1901 for(i=0; i<w; i++){
1902 b+= src[4*i+B];
1903 g+= src[4*i+G];
1904 r+= src[4*i+R];
f267d3ac 1905 a+= src[4*i+A];
73c6f598
NC
1906
1907 dst[4*i+B]= b;
1908 dst[4*i+G]= g;
1909 dst[4*i+R]= r;
f267d3ac 1910 dst[4*i+A]= a;
73c6f598
NC
1911 }
1912
1913 *red= r;
1914 *green= g;
1915 *blue= b;
f267d3ac 1916 *alpha= a;
73c6f598
NC
1917}
1918#undef B
1919#undef G
1920#undef R
f267d3ac 1921#undef A
73c6f598 1922
1457ab52
MN
1923#define BUTTERFLY2(o1,o2,i1,i2) \
1924o1= (i1)+(i2);\
1925o2= (i1)-(i2);
1926
1927#define BUTTERFLY1(x,y) \
1928{\
1929 int a,b;\
1930 a= x;\
1931 b= y;\
1932 x= a+b;\
1933 y= a-b;\
1934}
1935
c26abfa5 1936#define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
1457ab52 1937
bb198e19 1938static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
1457ab52
MN
1939 int i;
1940 int temp[64];
1941 int sum=0;
115329f1 1942
bb198e19 1943 assert(h==8);
1457ab52
MN
1944
1945 for(i=0; i<8; i++){
1946 //FIXME try pointer walks
1947 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
1948 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
1949 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
1950 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
115329f1 1951
1457ab52
MN
1952 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
1953 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
1954 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
1955 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
115329f1 1956
1457ab52
MN
1957 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
1958 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
1959 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
1960 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
1961 }
1962
1963 for(i=0; i<8; i++){
1964 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
1965 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
1966 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
1967 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
115329f1 1968
1457ab52
MN
1969 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
1970 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
1971 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
1972 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
1973
115329f1 1974 sum +=
1457ab52
MN
1975 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
1976 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
1977 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
1978 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
1979 }
1457ab52
MN
1980 return sum;
1981}
1982
622348f9 1983static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
1457ab52
MN
1984 int i;
1985 int temp[64];
1986 int sum=0;
115329f1 1987
622348f9 1988 assert(h==8);
115329f1 1989
1457ab52
MN
1990 for(i=0; i<8; i++){
1991 //FIXME try pointer walks
622348f9
MN
1992 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
1993 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
1994 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
1995 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
115329f1 1996
1457ab52
MN
1997 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
1998 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
1999 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2000 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
115329f1 2001
1457ab52
MN
2002 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2003 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2004 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2005 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2006 }
2007
2008 for(i=0; i<8; i++){
2009 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2010 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2011 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2012 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
115329f1 2013
1457ab52
MN
2014 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2015 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2016 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2017 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
115329f1
DB
2018
2019 sum +=
1457ab52
MN
2020 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2021 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2022 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2023 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2024 }
115329f1 2025
c26abfa5 2026 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
115329f1 2027
1457ab52
MN
2028 return sum;
2029}
2030
bb198e19 2031static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
1457ab52 2032 MpegEncContext * const s= (MpegEncContext *)c;
40d11227 2033 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
115329f1 2034
bb198e19 2035 assert(h==8);
1457ab52
MN
2036
2037 s->dsp.diff_pixels(temp, src1, src2, stride);
b0368839 2038 s->dsp.fdct(temp);
1edbfe19 2039 return s->dsp.sum_abs_dctelem(temp);
1457ab52
MN
2040}
2041
b250f9c6 2042#if CONFIG_GPL
27c61ac5
MN
2043#define DCT8_1D {\
2044 const int s07 = SRC(0) + SRC(7);\
2045 const int s16 = SRC(1) + SRC(6);\
2046 const int s25 = SRC(2) + SRC(5);\
2047 const int s34 = SRC(3) + SRC(4);\
2048 const int a0 = s07 + s34;\
2049 const int a1 = s16 + s25;\
2050 const int a2 = s07 - s34;\
2051 const int a3 = s16 - s25;\
2052 const int d07 = SRC(0) - SRC(7);\
2053 const int d16 = SRC(1) - SRC(6);\
2054 const int d25 = SRC(2) - SRC(5);\
2055 const int d34 = SRC(3) - SRC(4);\
2056 const int a4 = d16 + d25 + (d07 + (d07>>1));\
2057 const int a5 = d07 - d34 - (d25 + (d25>>1));\
2058 const int a6 = d07 + d34 - (d16 + (d16>>1));\
2059 const int a7 = d16 - d25 + (d34 + (d34>>1));\
2060 DST(0, a0 + a1 ) ;\
2061 DST(1, a4 + (a7>>2)) ;\
2062 DST(2, a2 + (a3>>1)) ;\
2063 DST(3, a5 + (a6>>2)) ;\
2064 DST(4, a0 - a1 ) ;\
2065 DST(5, a6 - (a5>>2)) ;\
2066 DST(6, (a2>>1) - a3 ) ;\
2067 DST(7, (a4>>2) - a7 ) ;\
2068}
2069
2070static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2071 MpegEncContext * const s= (MpegEncContext *)c;
8d15910a 2072 DCTELEM dct[8][8];
27c61ac5
MN
2073 int i;
2074 int sum=0;
2075
8d15910a 2076 s->dsp.diff_pixels(dct[0], src1, src2, stride);
27c61ac5
MN
2077
2078#define SRC(x) dct[i][x]
2079#define DST(x,v) dct[i][x]= v
2080 for( i = 0; i < 8; i++ )
2081 DCT8_1D
2082#undef SRC
2083#undef DST
2084
2085#define SRC(x) dct[x][i]
c26abfa5 2086#define DST(x,v) sum += FFABS(v)
27c61ac5
MN
2087 for( i = 0; i < 8; i++ )
2088 DCT8_1D
2089#undef SRC
2090#undef DST
2091 return sum;
2092}
2093#endif
2094
0fd6aea1
MN
2095static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2096 MpegEncContext * const s= (MpegEncContext *)c;
40d11227 2097 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
0fd6aea1 2098 int sum=0, i;
115329f1 2099
0fd6aea1
MN
2100 assert(h==8);
2101
2102 s->dsp.diff_pixels(temp, src1, src2, stride);
2103 s->dsp.fdct(temp);
2104
2105 for(i=0; i<64; i++)
c26abfa5 2106 sum= FFMAX(sum, FFABS(temp[i]));
115329f1 2107
0fd6aea1
MN
2108 return sum;
2109}
2110
bb198e19 2111static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
1457ab52 2112 MpegEncContext * const s= (MpegEncContext *)c;
40d11227 2113 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
2480c390 2114 DCTELEM * const bak = temp+64;
1457ab52
MN
2115 int sum=0, i;
2116
bb198e19 2117 assert(h==8);
1457ab52 2118 s->mb_intra=0;
115329f1 2119
1457ab52 2120 s->dsp.diff_pixels(temp, src1, src2, stride);
115329f1 2121
1457ab52 2122 memcpy(bak, temp, 64*sizeof(DCTELEM));
115329f1 2123
67725183 2124 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
d50635cd 2125 s->dct_unquantize_inter(s, temp, 0, s->qscale);
e7a972e1 2126 ff_simple_idct_8(temp); //FIXME
115329f1 2127
1457ab52
MN
2128 for(i=0; i<64; i++)
2129 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
115329f1 2130
1457ab52
MN
2131 return sum;
2132}
2133
bb198e19 2134static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3a87ac94 2135 MpegEncContext * const s= (MpegEncContext *)c;
0c1a9eda 2136 const uint8_t *scantable= s->intra_scantable.permutated;
40d11227
MR
2137 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2138 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
2139 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
e6dba5df 2140 int i, last, run, bits, level, distortion, start_i;
3a87ac94
MN
2141 const int esc_length= s->ac_esc_length;
2142 uint8_t * length;
2143 uint8_t * last_length;
115329f1 2144
bb198e19
MN
2145 assert(h==8);
2146
90d43b52
MR
2147 copy_block8(lsrc1, src1, 8, stride, 8);
2148 copy_block8(lsrc2, src2, 8, stride, 8);
3a87ac94 2149
90d43b52 2150 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
67725183
MN
2151
2152 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2153
2154 bits=0;
115329f1 2155
3a87ac94 2156 if (s->mb_intra) {
115329f1 2157 start_i = 1;
3a87ac94
MN
2158 length = s->intra_ac_vlc_length;
2159 last_length= s->intra_ac_vlc_last_length;
67725183 2160 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3a87ac94
MN
2161 } else {
2162 start_i = 0;
2163 length = s->inter_ac_vlc_length;
2164 last_length= s->inter_ac_vlc_last_length;
2165 }
115329f1 2166
67725183 2167 if(last>=start_i){
3a87ac94
MN
2168 run=0;
2169 for(i=start_i; i<last; i++){
2170 int j= scantable[i];
2171 level= temp[j];
115329f1 2172
3a87ac94
MN
2173 if(level){
2174 level+=64;
2175 if((level&(~127)) == 0){
2176 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2177 }else
2178 bits+= esc_length;
2179 run=0;
2180 }else
2181 run++;
2182 }
2183 i= scantable[last];
115329f1 2184
3a87ac94 2185 level= temp[i] + 64;
1d0eab1d
MN
2186
2187 assert(level - 64);
115329f1 2188
3a87ac94
MN
2189 if((level&(~127)) == 0){
2190 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2191 }else
2192 bits+= esc_length;
115329f1 2193
67725183
MN
2194 }
2195
2196 if(last>=0){
d50635cd
MN
2197 if(s->mb_intra)
2198 s->dct_unquantize_intra(s, temp, 0, s->qscale);
2199 else
2200 s->dct_unquantize_inter(s, temp, 0, s->qscale);
3a87ac94 2201 }
115329f1 2202
90d43b52 2203 s->dsp.idct_add(lsrc2, 8, temp);
115329f1 2204
90d43b52 2205 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
3a87ac94 2206
e6dba5df 2207 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
3a87ac94
MN
2208}
2209
bb198e19 2210static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3a87ac94 2211 MpegEncContext * const s= (MpegEncContext *)c;
0c1a9eda 2212 const uint8_t *scantable= s->intra_scantable.permutated;
40d11227 2213 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
3a87ac94
MN
2214 int i, last, run, bits, level, start_i;
2215 const int esc_length= s->ac_esc_length;
2216 uint8_t * length;
2217 uint8_t * last_length;
bb198e19
MN
2218
2219 assert(h==8);
115329f1 2220
67725183 2221 s->dsp.diff_pixels(temp, src1, src2, stride);
3a87ac94 2222
67725183
MN
2223 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2224
2225 bits=0;
115329f1 2226
3a87ac94 2227 if (s->mb_intra) {
115329f1 2228 start_i = 1;
3a87ac94
MN
2229 length = s->intra_ac_vlc_length;
2230 last_length= s->intra_ac_vlc_last_length;
67725183 2231 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3a87ac94
MN
2232 } else {
2233 start_i = 0;
2234 length = s->inter_ac_vlc_length;
2235 last_length= s->inter_ac_vlc_last_length;
2236 }
115329f1 2237
67725183 2238 if(last>=start_i){
3a87ac94
MN
2239 run=0;
2240 for(i=start_i; i<last; i++){
2241 int j= scantable[i];
2242 level= temp[j];
115329f1 2243
3a87ac94
MN
2244 if(level){
2245 level+=64;
2246 if((level&(~127)) == 0){
2247 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2248 }else
2249 bits+= esc_length;
2250 run=0;
2251 }else
2252 run++;
2253 }
2254 i= scantable[last];
115329f1 2255
67725183 2256 level= temp[i] + 64;
115329f1 2257
67725183 2258 assert(level - 64);
115329f1 2259
3a87ac94
MN
2260 if((level&(~127)) == 0){
2261 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2262 }else
2263 bits+= esc_length;
2264 }
2265
2266 return bits;
2267}
2268
7fb7f636
RS
2269#define VSAD_INTRA(size) \
2270static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2271 int score=0; \
2272 int x,y; \
2273 \
2274 for(y=1; y<h; y++){ \
2275 for(x=0; x<size; x+=4){ \
2276 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
2277 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
2278 } \
2279 s+= stride; \
2280 } \
2281 \
2282 return score; \
2283}
2284VSAD_INTRA(8)
2285VSAD_INTRA(16)
622348f9
MN
2286
2287static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2288 int score=0;
2289 int x,y;
115329f1 2290
622348f9
MN
2291 for(y=1; y<h; y++){
2292 for(x=0; x<16; x++){
c26abfa5 2293 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
622348f9
MN
2294 }
2295 s1+= stride;
2296 s2+= stride;
2297 }
115329f1 2298
622348f9
MN
2299 return score;
2300}
2301
2302#define SQ(a) ((a)*(a))
7fb7f636
RS
2303#define VSSE_INTRA(size) \
2304static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2305 int score=0; \
2306 int x,y; \
2307 \
2308 for(y=1; y<h; y++){ \
2309 for(x=0; x<size; x+=4){ \
2310 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
2311 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
2312 } \
2313 s+= stride; \
2314 } \
2315 \
2316 return score; \
2317}
2318VSSE_INTRA(8)
2319VSSE_INTRA(16)
622348f9
MN
2320
2321static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2322 int score=0;
2323 int x,y;
115329f1 2324
622348f9
MN
2325 for(y=1; y<h; y++){
2326 for(x=0; x<16; x++){
2327 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2328 }
2329 s1+= stride;
2330 s2+= stride;
2331 }
115329f1 2332
622348f9
MN
2333 return score;
2334}
2335
a00177a9
MR
2336static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
2337 int size){
59006372
LM
2338 int score=0;
2339 int i;
2340 for(i=0; i<size; i++)
2341 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
2342 return score;
2343}
2344
9fbd14ac
DB
2345WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
2346WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
2347WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
b250f9c6 2348#if CONFIG_GPL
9fbd14ac 2349WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
60900991 2350#endif
9fbd14ac
DB
2351WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
2352WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
2353WRAPPER8_16_SQ(rd8x8_c, rd16_c)
2354WRAPPER8_16_SQ(bit8x8_c, bit16_c)
1457ab52 2355
eb4825b5
LM
2356static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
2357 int i;
2358 src1 += len-1;
2359 for(i=0; i<len; i++)
2360 dst[i] = src0[i] * src1[-i];
2361}
2362
952e8721 2363static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
eb4825b5
LM
2364 int i;
2365 for(i=0; i<len; i++)
952e8721 2366 dst[i] = src0[i] * src1[i] + src2[i];
eb4825b5
LM
2367}
2368
53b57211
MR
2369static void butterflies_float_c(float *restrict v1, float *restrict v2,
2370 int len)
2371{
2372 int i;
2373 for (i = 0; i < len; i++) {
2374 float t = v1[i] - v2[i];
2375 v1[i] += v2[i];
2376 v2[i] = t;
2377 }
2378}
2379
dafcbfe4 2380float ff_scalarproduct_float_c(const float *v1, const float *v2, int len)
53b57211
MR
2381{
2382 float p = 0.0;
2383 int i;
2384
2385 for (i = 0; i < len; i++)
2386 p += v1[i] * v2[i];
2387
2388 return p;
2389}
2390
0a68cd87
VS
2391static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
2392 uint32_t maxi, uint32_t maxisign)
2393{
2394
2395 if(a > mini) return mini;
187a5379 2396 else if((a^(1U<<31)) > maxisign) return maxi;
0a68cd87
VS
2397 else return a;
2398}
2399
50e23ae9 2400static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
0a68cd87
VS
2401 int i;
2402 uint32_t mini = *(uint32_t*)min;
2403 uint32_t maxi = *(uint32_t*)max;
187a5379 2404 uint32_t maxisign = maxi ^ (1U<<31);
0a68cd87 2405 uint32_t *dsti = (uint32_t*)dst;
50e23ae9 2406 const uint32_t *srci = (const uint32_t*)src;
0a68cd87
VS
2407 for(i=0; i<len; i+=8) {
2408 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
2409 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
2410 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
2411 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
2412 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
2413 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
2414 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
2415 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
2416 }
2417}
50e23ae9 2418static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
0a68cd87
VS
2419 int i;
2420 if(min < 0 && max > 0) {
2421 vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
2422 } else {
2423 for(i=0; i < len; i+=8) {
2424 dst[i ] = av_clipf(src[i ], min, max);
2425 dst[i + 1] = av_clipf(src[i + 1], min, max);
2426 dst[i + 2] = av_clipf(src[i + 2], min, max);
2427 dst[i + 3] = av_clipf(src[i + 3], min, max);
2428 dst[i + 4] = av_clipf(src[i + 4], min, max);
2429 dst[i + 5] = av_clipf(src[i + 5], min, max);
2430 dst[i + 6] = av_clipf(src[i + 6], min, max);
2431 dst[i + 7] = av_clipf(src[i + 7], min, max);
2432 }
2433 }
2434}
2435
7e1ce6a6 2436static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order)
88c0536a
KS
2437{
2438 int res = 0;
2439
2440 while (order--)
7e1ce6a6 2441 res += *v1++ * *v2++;
88c0536a
KS
2442
2443 return res;
2444}
2445
b3858964 2446static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
b1159ad9
LM
2447{
2448 int res = 0;
2449 while (order--) {
2450 res += *v1 * *v2++;
2451 *v1++ += mul * *v3++;
2452 }
2453 return res;
2454}
2455
e6e98234
JR
2456static void apply_window_int16_c(int16_t *output, const int16_t *input,
2457 const int16_t *window, unsigned int len)
2458{
2459 int i;
2460 int len2 = len >> 1;
2461
2462 for (i = 0; i < len2; i++) {
2463 int16_t w = window[i];
2464 output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15;
2465 output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
2466 }
2467}
2468
6054cd25
JR
2469static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
2470 int32_t max, unsigned int len)
2471{
2472 do {
2473 *dst++ = av_clip(*src++, min, max);
2474 *dst++ = av_clip(*src++, min, max);
2475 *dst++ = av_clip(*src++, min, max);
2476 *dst++ = av_clip(*src++, min, max);
2477 *dst++ = av_clip(*src++, min, max);
2478 *dst++ = av_clip(*src++, min, max);
2479 *dst++ = av_clip(*src++, min, max);
2480 *dst++ = av_clip(*src++, min, max);
2481 len -= 8;
2482 } while (len > 0);
2483}
2484
b0368839
MN
2485static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
2486{
c8e1b2fb 2487 ff_j_rev_dct (block);
dbc9f84e 2488 put_pixels_clamped_c(block, dest, line_size);
b0368839
MN
2489}
2490static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
2491{
c8e1b2fb 2492 ff_j_rev_dct (block);
dbc9f84e 2493 add_pixels_clamped_c(block, dest, line_size);
b0368839
MN
2494}
2495
59cf08ce 2496/* init static data */
9cf0841e 2497av_cold void ff_dsputil_static_init(void)
e0eac44e 2498{
d2975f8d 2499 int i;
e0eac44e 2500
55fde95e 2501 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
59cf08ce 2502 for(i=0;i<MAX_NEG_CROP;i++) {
55fde95e
MR
2503 ff_cropTbl[i] = 0;
2504 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
59cf08ce 2505 }
115329f1 2506
59cf08ce 2507 for(i=0;i<512;i++) {
1d503957 2508 ff_squareTbl[i] = (i - 256) * (i - 256);
59cf08ce 2509 }
115329f1 2510
873c89e2 2511 for(i=0; i<64; i++) ff_inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
59cf08ce 2512}
92ddb692 2513
6dc7d5da
MN
2514int ff_check_alignment(void){
2515 static int did_fail=0;
29b42c66 2516 LOCAL_ALIGNED_16(int, aligned, [4]);
6dc7d5da 2517
29b42c66 2518 if((intptr_t)aligned & 15){
6dc7d5da 2519 if(!did_fail){
b250f9c6 2520#if HAVE_MMX || HAVE_ALTIVEC
6dc7d5da 2521 av_log(NULL, AV_LOG_ERROR,
c1173617
MR
2522 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
2523 "and may be very slow or crash. This is not a bug in libavcodec,\n"
5e4c7ca2 2524 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
6001dad6 2525 "Do not report crashes to Libav developers.\n");
6dc7d5da
MN
2526#endif
2527 did_fail=1;
2528 }
2529 return -1;
2530 }
2531 return 0;
2532}
92ddb692 2533
9cf0841e 2534av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
59cf08ce 2535{
6dc7d5da
MN
2536 ff_check_alignment();
2537
b250f9c6 2538#if CONFIG_ENCODERS
0a72533e
MR
2539 if (avctx->bits_per_raw_sample == 10) {
2540 c->fdct = ff_jpeg_fdct_islow_10;
2541 c->fdct248 = ff_fdct248_islow_10;
2542 } else {
2543 if(avctx->dct_algo==FF_DCT_FASTINT) {
3e2efacd
MS
2544 c->fdct = ff_fdct_ifast;
2545 c->fdct248 = ff_fdct_ifast248;
0a72533e
MR
2546 }
2547 else if(avctx->dct_algo==FF_DCT_FAAN) {
2548 c->fdct = ff_faandct;
2549 c->fdct248 = ff_faandct248;
2550 }
2551 else {
2552 c->fdct = ff_jpeg_fdct_islow_8; //slow/accurate/default
2553 c->fdct248 = ff_fdct248_islow_8;
2554 }
10acc479 2555 }
b0368839
MN
2556#endif //CONFIG_ENCODERS
2557
2bcbd984
MR
2558 if (avctx->bits_per_raw_sample == 10) {
2559 c->idct_put = ff_simple_idct_put_10;
2560 c->idct_add = ff_simple_idct_add_10;
2561 c->idct = ff_simple_idct_10;
2562 c->idct_permutation_type = FF_NO_IDCT_PERM;
2563 } else {
178fcca8
MN
2564 if(avctx->idct_algo==FF_IDCT_INT){
2565 c->idct_put= ff_jref_idct_put;
2566 c->idct_add= ff_jref_idct_add;
c8e1b2fb 2567 c->idct = ff_j_rev_dct;
178fcca8 2568 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
6f08c541
MN
2569 }else if(avctx->idct_algo==FF_IDCT_FAAN){
2570 c->idct_put= ff_faanidct_put;
2571 c->idct_add= ff_faanidct_add;
2572 c->idct = ff_faanidct;
2573 c->idct_permutation_type= FF_NO_IDCT_PERM;
178fcca8 2574 }else{ //accurate/default
e7a972e1
MR
2575 c->idct_put = ff_simple_idct_put_8;
2576 c->idct_add = ff_simple_idct_add_8;
2577 c->idct = ff_simple_idct_8;
178fcca8
MN
2578 c->idct_permutation_type= FF_NO_IDCT_PERM;
2579 }
b0368839
MN
2580 }
2581
eb4b3dd3 2582 c->diff_pixels = diff_pixels_c;
dbc9f84e
MR
2583 c->put_pixels_clamped = put_pixels_clamped_c;
2584 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
2585 c->add_pixels_clamped = add_pixels_clamped_c;
1edbfe19 2586 c->sum_abs_dctelem = sum_abs_dctelem_c;
eb4b3dd3 2587 c->gmc1 = gmc1_c;
703c8195 2588 c->gmc = ff_gmc_c;
eb4b3dd3
ZK
2589 c->pix_sum = pix_sum_c;
2590 c->pix_norm1 = pix_norm1_c;
2591
342c7dfd
KS
2592 c->fill_block_tab[0] = fill_block16_c;
2593 c->fill_block_tab[1] = fill_block8_c;
342c7dfd 2594
45553457 2595 /* TODO [0] 16 [1] 8 */
bb198e19
MN
2596 c->pix_abs[0][0] = pix_abs16_c;
2597 c->pix_abs[0][1] = pix_abs16_x2_c;
2598 c->pix_abs[0][2] = pix_abs16_y2_c;
2599 c->pix_abs[0][3] = pix_abs16_xy2_c;
2600 c->pix_abs[1][0] = pix_abs8_c;
2601 c->pix_abs[1][1] = pix_abs8_x2_c;
2602 c->pix_abs[1][2] = pix_abs8_y2_c;
2603 c->pix_abs[1][3] = pix_abs8_xy2_c;
eb4b3dd3 2604
669ac79c
MN
2605 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
2606 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
2607 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
2608 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
2609 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
2610 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
2611 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
2612 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
2613 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
2614
da3b9756
MM
2615 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
2616 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
2617 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
2618 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
2619 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
2620 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
2621 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
2622 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
2623 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
2624
45553457
ZK
2625#define dspfunc(PFX, IDX, NUM) \
2626 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2627 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2628 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2629 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2630 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2631 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2632 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2633 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2634 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2635 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2636 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2637 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2638 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2639 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2640 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2641 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2642
2643 dspfunc(put_qpel, 0, 16);
2644 dspfunc(put_no_rnd_qpel, 0, 16);
2645
2646 dspfunc(avg_qpel, 0, 16);
2647 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
2648
2649 dspfunc(put_qpel, 1, 8);
2650 dspfunc(put_no_rnd_qpel, 1, 8);
2651
2652 dspfunc(avg_qpel, 1, 8);
2653 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
0da71265 2654
45553457 2655#undef dspfunc
5a6a9e78 2656
3d1b1caa 2657 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
1457ab52
MN
2658 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
2659 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
2660 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
2661 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
2662 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
2663 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
2664 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
115329f1 2665
bb198e19
MN
2666#define SET_CMP_FUNC(name) \
2667 c->name[0]= name ## 16_c;\
2668 c->name[1]= name ## 8x8_c;
115329f1 2669
bb198e19 2670 SET_CMP_FUNC(hadamard8_diff)
622348f9 2671 c->hadamard8_diff[4]= hadamard8_intra16_c;
7fb7f636 2672 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
bb198e19 2673 SET_CMP_FUNC(dct_sad)
0fd6aea1 2674 SET_CMP_FUNC(dct_max)
b250f9c6 2675#if CONFIG_GPL
27c61ac5 2676 SET_CMP_FUNC(dct264_sad)
60900991 2677#endif
bb198e19
MN
2678 c->sad[0]= pix_abs16_c;
2679 c->sad[1]= pix_abs8_c;
2680 c->sse[0]= sse16_c;
2681 c->sse[1]= sse8_c;
26efc54e 2682 c->sse[2]= sse4_c;
bb198e19
MN
2683 SET_CMP_FUNC(quant_psnr)
2684 SET_CMP_FUNC(rd)
2685 SET_CMP_FUNC(bit)
622348f9
MN
2686 c->vsad[0]= vsad16_c;
2687 c->vsad[4]= vsad_intra16_c;
7fb7f636 2688 c->vsad[5]= vsad_intra8_c;
622348f9
MN
2689 c->vsse[0]= vsse16_c;
2690 c->vsse[4]= vsse_intra16_c;
7fb7f636 2691 c->vsse[5]= vsse_intra8_c;
e6a2ac34
MN
2692 c->nsse[0]= nsse16_c;
2693 c->nsse[1]= nsse8_c;
26efc54e 2694
59006372
LM
2695 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
2696
11f18faf
MN
2697 c->add_bytes= add_bytes_c;
2698 c->diff_bytes= diff_bytes_c;
3daa434a 2699 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
84705403 2700 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
73c6f598
NC
2701 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
2702 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
3d2e8cce 2703 c->bswap_buf= bswap_buf;
381d37fd 2704 c->bswap16_buf = bswap16_buf;
42251a2a 2705
4052cbf1 2706 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
674eeb5f
AJ
2707 c->h263_h_loop_filter= h263_h_loop_filter_c;
2708 c->h263_v_loop_filter= h263_v_loop_filter_c;
eb75a698 2709 }
115329f1 2710
fdbbf2e0 2711 c->h261_loop_filter= h261_loop_filter_c;
115329f1 2712
364a1797
MN
2713 c->try_8x8basis= try_8x8basis_c;
2714 c->add_8x8basis= add_8x8basis_c;
11f18faf 2715
eb4825b5 2716 c->vector_fmul_reverse = vector_fmul_reverse_c;
952e8721 2717 c->vector_fmul_add = vector_fmul_add_c;
0a68cd87 2718 c->vector_clipf = vector_clipf_c;
88c0536a 2719 c->scalarproduct_int16 = scalarproduct_int16_c;
b1159ad9 2720 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
e6e98234 2721 c->apply_window_int16 = apply_window_int16_c;
6054cd25 2722 c->vector_clip_int32 = vector_clip_int32_c;
dafcbfe4 2723 c->scalarproduct_float = ff_scalarproduct_float_c;
53b57211 2724 c->butterflies_float = butterflies_float_c;
53b57211 2725
9686abb8 2726 c->shrink[0]= av_image_copy_plane;
54009d42
MN
2727 c->shrink[1]= ff_shrink22;
2728 c->shrink[2]= ff_shrink44;
2729 c->shrink[3]= ff_shrink88;
2730
19a0729b
OA
2731#undef FUNC
2732#undef FUNCC
2733#define FUNC(f, depth) f ## _ ## depth
2734#define FUNCC(f, depth) f ## _ ## depth ## _c
2735
2736#define dspfunc1(PFX, IDX, NUM, depth)\
2737 c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM , depth);\
2738 c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
2739 c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
2740 c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
2741
2742#define dspfunc2(PFX, IDX, NUM, depth)\
2743 c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
2744 c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
2745 c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
2746 c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
2747 c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
2748 c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
2749 c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
2750 c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
2751 c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
2752 c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
2753 c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
2754 c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
2755 c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
2756 c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
2757 c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
2758 c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
2759
2760
5cc26009 2761#define BIT_DEPTH_FUNCS(depth, dct)\
874f1a90 2762 c->get_pixels = FUNCC(get_pixels ## dct , depth);\
19a0729b 2763 c->draw_edges = FUNCC(draw_edges , depth);\
5cc26009
MR
2764 c->clear_block = FUNCC(clear_block ## dct , depth);\
2765 c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
2766 c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
2767 c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
0e02b381 2768 c->put_no_rnd_pixels_l2 = FUNCC(put_no_rnd_pixels8_l2 , depth);\
19a0729b
OA
2769\
2770 c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\
2771 c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\
2772 c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\
2773 c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\
2774 c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\
2775 c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth);\
2776\
2777 dspfunc1(put , 0, 16, depth);\
2778 dspfunc1(put , 1, 8, depth);\
2779 dspfunc1(put , 2, 4, depth);\
2780 dspfunc1(put , 3, 2, depth);\
2781 dspfunc1(put_no_rnd, 0, 16, depth);\
2782 dspfunc1(put_no_rnd, 1, 8, depth);\
2783 dspfunc1(avg , 0, 16, depth);\
2784 dspfunc1(avg , 1, 8, depth);\
2785 dspfunc1(avg , 2, 4, depth);\
2786 dspfunc1(avg , 3, 2, depth);\
2787 dspfunc1(avg_no_rnd, 0, 16, depth);\
2788 dspfunc1(avg_no_rnd, 1, 8, depth);\
2789\
2790 dspfunc2(put_h264_qpel, 0, 16, depth);\
2791 dspfunc2(put_h264_qpel, 1, 8, depth);\
2792 dspfunc2(put_h264_qpel, 2, 4, depth);\
2793 dspfunc2(put_h264_qpel, 3, 2, depth);\
2794 dspfunc2(avg_h264_qpel, 0, 16, depth);\
2795 dspfunc2(avg_h264_qpel, 1, 8, depth);\
2796 dspfunc2(avg_h264_qpel, 2, 4, depth);
2797
a82beafd
MR
2798 switch (avctx->bits_per_raw_sample) {
2799 case 9:
5cc26009
MR
2800 if (c->dct_bits == 32) {
2801 BIT_DEPTH_FUNCS(9, _32);
2802 } else {
2803 BIT_DEPTH_FUNCS(9, _16);
2804 }
a82beafd
MR
2805 break;
2806 case 10:
5cc26009
MR
2807 if (c->dct_bits == 32) {
2808 BIT_DEPTH_FUNCS(10, _32);
2809 } else {
2810 BIT_DEPTH_FUNCS(10, _16);
2811 }
a82beafd
MR
2812 break;
2813 default:
5cc26009 2814 BIT_DEPTH_FUNCS(8, _16);
a82beafd 2815 break;
19a0729b
OA
2816 }
2817
2818
9cf0841e
MS
2819 if (HAVE_MMX) ff_dsputil_init_mmx (c, avctx);
2820 if (ARCH_ARM) ff_dsputil_init_arm (c, avctx);
2821 if (HAVE_VIS) ff_dsputil_init_vis (c, avctx);
2822 if (ARCH_ALPHA) ff_dsputil_init_alpha (c, avctx);
2823 if (ARCH_PPC) ff_dsputil_init_ppc (c, avctx);
9cf0841e
MS
2824 if (ARCH_SH4) ff_dsputil_init_sh4 (c, avctx);
2825 if (ARCH_BFIN) ff_dsputil_init_bfin (c, avctx);
43f1708f 2826
92fb52d9
RB
2827 ff_init_scantable_permutation(c->idct_permutation,
2828 c->idct_permutation_type);
57060b1e 2829}