Skip cmdutils_common_opts.h fragment in checkheaders
[libav.git] / libavcodec / dct-test.c
1 /*
2 * (c) 2001 Fabrice Bellard
3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file libavcodec/dct-test.c
24 * DCT test (c) 2001 Fabrice Bellard
25 * Started from sample code by Juan J. Sierralta P.
26 */
27
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <string.h>
31 #include <sys/time.h>
32 #include <unistd.h>
33 #include <math.h>
34
35 #include "libavutil/common.h"
36 #include "libavutil/lfg.h"
37
38 #include "simple_idct.h"
39 #include "aandcttab.h"
40 #include "faandct.h"
41 #include "faanidct.h"
42 #include "x86/idct_xvid.h"
43 #include "dctref.h"
44
45 #undef printf
46
47 void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
48
49 void ff_mmx_idct(DCTELEM *data);
50 void ff_mmxext_idct(DCTELEM *data);
51
52 void odivx_idct_c(short *block);
53
54 // BFIN
55 void ff_bfin_idct(DCTELEM *block);
56 void ff_bfin_fdct(DCTELEM *block);
57
58 // ALTIVEC
59 void fdct_altivec(DCTELEM *block);
60 //void idct_altivec(DCTELEM *block);?? no routine
61
62 // ARM
63 void j_rev_dct_ARM(DCTELEM *data);
64 void simple_idct_ARM(DCTELEM *data);
65 void simple_idct_armv5te(DCTELEM *data);
66 void ff_simple_idct_armv6(DCTELEM *data);
67 void ff_simple_idct_neon(DCTELEM *data);
68
69 void ff_simple_idct_axp(DCTELEM *data);
70
71 struct algo {
72 const char *name;
73 enum { FDCT, IDCT } is_idct;
74 void (* func) (DCTELEM *block);
75 void (* ref) (DCTELEM *block);
76 enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, SSE2_PERM, PARTTRANS_PERM } format;
77 int mm_support;
78 };
79
80 #ifndef FAAN_POSTSCALE
81 #define FAAN_SCALE SCALE_PERM
82 #else
83 #define FAAN_SCALE NO_PERM
84 #endif
85
86 static int cpu_flags;
87
88 struct algo algos[] = {
89 {"REF-DBL", 0, ff_ref_fdct, ff_ref_fdct, NO_PERM},
90 {"FAAN", 0, ff_faandct, ff_ref_fdct, FAAN_SCALE},
91 {"FAANI", 1, ff_faanidct, ff_ref_idct, NO_PERM},
92 {"IJG-AAN-INT", 0, fdct_ifast, ff_ref_fdct, SCALE_PERM},
93 {"IJG-LLM-INT", 0, ff_jpeg_fdct_islow, ff_ref_fdct, NO_PERM},
94 {"REF-DBL", 1, ff_ref_idct, ff_ref_idct, NO_PERM},
95 {"INT", 1, j_rev_dct, ff_ref_idct, MMX_PERM},
96 {"SIMPLE-C", 1, ff_simple_idct, ff_ref_idct, NO_PERM},
97
98 #if HAVE_MMX
99 {"MMX", 0, ff_fdct_mmx, ff_ref_fdct, NO_PERM, FF_MM_MMX},
100 #if HAVE_MMX2
101 {"MMX2", 0, ff_fdct_mmx2, ff_ref_fdct, NO_PERM, FF_MM_MMX2},
102 {"SSE2", 0, ff_fdct_sse2, ff_ref_fdct, NO_PERM, FF_MM_SSE2},
103 #endif
104
105 #if CONFIG_GPL
106 {"LIBMPEG2-MMX", 1, ff_mmx_idct, ff_ref_idct, MMX_PERM, FF_MM_MMX},
107 {"LIBMPEG2-MMX2", 1, ff_mmxext_idct, ff_ref_idct, MMX_PERM, FF_MM_MMX2},
108 #endif
109 {"SIMPLE-MMX", 1, ff_simple_idct_mmx, ff_ref_idct, MMX_SIMPLE_PERM, FF_MM_MMX},
110 {"XVID-MMX", 1, ff_idct_xvid_mmx, ff_ref_idct, NO_PERM, FF_MM_MMX},
111 {"XVID-MMX2", 1, ff_idct_xvid_mmx2, ff_ref_idct, NO_PERM, FF_MM_MMX2},
112 {"XVID-SSE2", 1, ff_idct_xvid_sse2, ff_ref_idct, SSE2_PERM, FF_MM_SSE2},
113 #endif
114
115 #if HAVE_ALTIVEC
116 {"altivecfdct", 0, fdct_altivec, ff_ref_fdct, NO_PERM, FF_MM_ALTIVEC},
117 #endif
118
119 #if ARCH_BFIN
120 {"BFINfdct", 0, ff_bfin_fdct, ff_ref_fdct, NO_PERM},
121 {"BFINidct", 1, ff_bfin_idct, ff_ref_idct, NO_PERM},
122 #endif
123
124 #if ARCH_ARM
125 {"SIMPLE-ARM", 1, simple_idct_ARM, ff_ref_idct, NO_PERM },
126 {"INT-ARM", 1, j_rev_dct_ARM, ff_ref_idct, MMX_PERM },
127 #if HAVE_ARMV5TE
128 {"SIMPLE-ARMV5TE", 1, simple_idct_armv5te, ff_ref_idct, NO_PERM },
129 #endif
130 #if HAVE_ARMV6
131 {"SIMPLE-ARMV6", 1, ff_simple_idct_armv6, ff_ref_idct, MMX_PERM },
132 #endif
133 #if HAVE_NEON
134 {"SIMPLE-NEON", 1, ff_simple_idct_neon, ff_ref_idct, PARTTRANS_PERM },
135 #endif
136 #endif /* ARCH_ARM */
137
138 #if ARCH_ALPHA
139 {"SIMPLE-ALPHA", 1, ff_simple_idct_axp, ff_ref_idct, NO_PERM },
140 #endif
141
142 { 0 }
143 };
144
145 #define AANSCALE_BITS 12
146
147 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
148
149 static int64_t gettime(void)
150 {
151 struct timeval tv;
152 gettimeofday(&tv,NULL);
153 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
154 }
155
156 #define NB_ITS 20000
157 #define NB_ITS_SPEED 50000
158
159 static short idct_mmx_perm[64];
160
161 static short idct_simple_mmx_perm[64]={
162 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
163 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
164 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
165 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
166 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
167 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
168 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
169 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
170 };
171
172 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
173
174 static void idct_mmx_init(void)
175 {
176 int i;
177
178 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
179 for (i = 0; i < 64; i++) {
180 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
181 // idct_simple_mmx_perm[i] = simple_block_permute_op(i);
182 }
183 }
184
185 DECLARE_ALIGNED(16, static DCTELEM, block)[64];
186 DECLARE_ALIGNED(8, static DCTELEM, block1)[64];
187 DECLARE_ALIGNED(8, static DCTELEM, block_org)[64];
188
189 static inline void mmx_emms(void)
190 {
191 #if HAVE_MMX
192 if (cpu_flags & FF_MM_MMX)
193 __asm__ volatile ("emms\n\t");
194 #endif
195 }
196
197 static void dct_error(const char *name, int is_idct,
198 void (*fdct_func)(DCTELEM *block),
199 void (*fdct_ref)(DCTELEM *block), int form, int test)
200 {
201 int it, i, scale;
202 int err_inf, v;
203 int64_t err2, ti, ti1, it1;
204 int64_t sysErr[64], sysErrMax=0;
205 int maxout=0;
206 int blockSumErrMax=0, blockSumErr;
207 AVLFG prng;
208
209 av_lfg_init(&prng, 1);
210
211 err_inf = 0;
212 err2 = 0;
213 for(i=0; i<64; i++) sysErr[i]=0;
214 for(it=0;it<NB_ITS;it++) {
215 for(i=0;i<64;i++)
216 block1[i] = 0;
217 switch(test){
218 case 0:
219 for(i=0;i<64;i++)
220 block1[i] = (av_lfg_get(&prng) % 512) -256;
221 if (is_idct){
222 ff_ref_fdct(block1);
223
224 for(i=0;i<64;i++)
225 block1[i]>>=3;
226 }
227 break;
228 case 1:{
229 int num = av_lfg_get(&prng) % 10 + 1;
230 for(i=0;i<num;i++)
231 block1[av_lfg_get(&prng) % 64] = av_lfg_get(&prng) % 512 -256;
232 }break;
233 case 2:
234 block1[0] = av_lfg_get(&prng) % 4096 - 2048;
235 block1[63]= (block1[0]&1)^1;
236 break;
237 }
238
239 #if 0 // simulate mismatch control
240 { int sum=0;
241 for(i=0;i<64;i++)
242 sum+=block1[i];
243
244 if((sum&1)==0) block1[63]^=1;
245 }
246 #endif
247
248 for(i=0; i<64; i++)
249 block_org[i]= block1[i];
250
251 if (form == MMX_PERM) {
252 for(i=0;i<64;i++)
253 block[idct_mmx_perm[i]] = block1[i];
254 } else if (form == MMX_SIMPLE_PERM) {
255 for(i=0;i<64;i++)
256 block[idct_simple_mmx_perm[i]] = block1[i];
257
258 } else if (form == SSE2_PERM) {
259 for(i=0; i<64; i++)
260 block[(i&0x38) | idct_sse2_row_perm[i&7]] = block1[i];
261 } else if (form == PARTTRANS_PERM) {
262 for(i=0; i<64; i++)
263 block[(i&0x24) | ((i&3)<<3) | ((i>>3)&3)] = block1[i];
264 } else {
265 for(i=0; i<64; i++)
266 block[i]= block1[i];
267 }
268 #if 0 // simulate mismatch control for tested IDCT but not the ref
269 { int sum=0;
270 for(i=0;i<64;i++)
271 sum+=block[i];
272
273 if((sum&1)==0) block[63]^=1;
274 }
275 #endif
276
277 fdct_func(block);
278 mmx_emms();
279
280 if (form == SCALE_PERM) {
281 for(i=0; i<64; i++) {
282 scale = 8*(1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
283 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
284 }
285 }
286
287 fdct_ref(block1);
288
289 blockSumErr=0;
290 for(i=0;i<64;i++) {
291 v = abs(block[i] - block1[i]);
292 if (v > err_inf)
293 err_inf = v;
294 err2 += v * v;
295 sysErr[i] += block[i] - block1[i];
296 blockSumErr += v;
297 if( abs(block[i])>maxout) maxout=abs(block[i]);
298 }
299 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
300 #if 0 // print different matrix pairs
301 if(blockSumErr){
302 printf("\n");
303 for(i=0; i<64; i++){
304 if((i&7)==0) printf("\n");
305 printf("%4d ", block_org[i]);
306 }
307 for(i=0; i<64; i++){
308 if((i&7)==0) printf("\n");
309 printf("%4d ", block[i] - block1[i]);
310 }
311 }
312 #endif
313 }
314 for(i=0; i<64; i++) sysErrMax= FFMAX(sysErrMax, FFABS(sysErr[i]));
315
316 #if 1 // dump systematic errors
317 for(i=0; i<64; i++){
318 if(i%8==0) printf("\n");
319 printf("%7d ", (int)sysErr[i]);
320 }
321 printf("\n");
322 #endif
323
324 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
325 is_idct ? "IDCT" : "DCT",
326 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
327 #if 1 //Speed test
328 /* speed test */
329 for(i=0;i<64;i++)
330 block1[i] = 0;
331 switch(test){
332 case 0:
333 for(i=0;i<64;i++)
334 block1[i] = av_lfg_get(&prng) % 512 -256;
335 if (is_idct){
336 ff_ref_fdct(block1);
337
338 for(i=0;i<64;i++)
339 block1[i]>>=3;
340 }
341 break;
342 case 1:{
343 case 2:
344 block1[0] = av_lfg_get(&prng) % 512 -256;
345 block1[1] = av_lfg_get(&prng) % 512 -256;
346 block1[2] = av_lfg_get(&prng) % 512 -256;
347 block1[3] = av_lfg_get(&prng) % 512 -256;
348 }break;
349 }
350
351 if (form == MMX_PERM) {
352 for(i=0;i<64;i++)
353 block[idct_mmx_perm[i]] = block1[i];
354 } else if(form == MMX_SIMPLE_PERM) {
355 for(i=0;i<64;i++)
356 block[idct_simple_mmx_perm[i]] = block1[i];
357 } else {
358 for(i=0; i<64; i++)
359 block[i]= block1[i];
360 }
361
362 ti = gettime();
363 it1 = 0;
364 do {
365 for(it=0;it<NB_ITS_SPEED;it++) {
366 for(i=0; i<64; i++)
367 block[i]= block1[i];
368 // memcpy(block, block1, sizeof(DCTELEM) * 64);
369 // do not memcpy especially not fastmemcpy because it does movntq !!!
370 fdct_func(block);
371 }
372 it1 += NB_ITS_SPEED;
373 ti1 = gettime() - ti;
374 } while (ti1 < 1000000);
375 mmx_emms();
376
377 printf("%s %s: %0.1f kdct/s\n",
378 is_idct ? "IDCT" : "DCT",
379 name, (double)it1 * 1000.0 / (double)ti1);
380 #endif
381 }
382
383 DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
384 DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];
385
386 static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
387 {
388 static int init;
389 static double c8[8][8];
390 static double c4[4][4];
391 double block1[64], block2[64], block3[64];
392 double s, sum, v;
393 int i, j, k;
394
395 if (!init) {
396 init = 1;
397
398 for(i=0;i<8;i++) {
399 sum = 0;
400 for(j=0;j<8;j++) {
401 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
402 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
403 sum += c8[i][j] * c8[i][j];
404 }
405 }
406
407 for(i=0;i<4;i++) {
408 sum = 0;
409 for(j=0;j<4;j++) {
410 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
411 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
412 sum += c4[i][j] * c4[i][j];
413 }
414 }
415 }
416
417 /* butterfly */
418 s = 0.5 * sqrt(2.0);
419 for(i=0;i<4;i++) {
420 for(j=0;j<8;j++) {
421 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
422 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
423 }
424 }
425
426 /* idct8 on lines */
427 for(i=0;i<8;i++) {
428 for(j=0;j<8;j++) {
429 sum = 0;
430 for(k=0;k<8;k++)
431 sum += c8[k][j] * block1[8*i+k];
432 block2[8*i+j] = sum;
433 }
434 }
435
436 /* idct4 */
437 for(i=0;i<8;i++) {
438 for(j=0;j<4;j++) {
439 /* top */
440 sum = 0;
441 for(k=0;k<4;k++)
442 sum += c4[k][j] * block2[8*(2*k)+i];
443 block3[8*(2*j)+i] = sum;
444
445 /* bottom */
446 sum = 0;
447 for(k=0;k<4;k++)
448 sum += c4[k][j] * block2[8*(2*k+1)+i];
449 block3[8*(2*j+1)+i] = sum;
450 }
451 }
452
453 /* clamp and store the result */
454 for(i=0;i<8;i++) {
455 for(j=0;j<8;j++) {
456 v = block3[8*i+j];
457 if (v < 0)
458 v = 0;
459 else if (v > 255)
460 v = 255;
461 dest[i * linesize + j] = (int)rint(v);
462 }
463 }
464 }
465
466 static void idct248_error(const char *name,
467 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
468 {
469 int it, i, it1, ti, ti1, err_max, v;
470
471 AVLFG prng;
472
473 av_lfg_init(&prng, 1);
474
475 /* just one test to see if code is correct (precision is less
476 important here) */
477 err_max = 0;
478 for(it=0;it<NB_ITS;it++) {
479
480 /* XXX: use forward transform to generate values */
481 for(i=0;i<64;i++)
482 block1[i] = av_lfg_get(&prng) % 256 - 128;
483 block1[0] += 1024;
484
485 for(i=0; i<64; i++)
486 block[i]= block1[i];
487 idct248_ref(img_dest1, 8, block);
488
489 for(i=0; i<64; i++)
490 block[i]= block1[i];
491 idct248_put(img_dest, 8, block);
492
493 for(i=0;i<64;i++) {
494 v = abs((int)img_dest[i] - (int)img_dest1[i]);
495 if (v == 255)
496 printf("%d %d\n", img_dest[i], img_dest1[i]);
497 if (v > err_max)
498 err_max = v;
499 }
500 #if 0
501 printf("ref=\n");
502 for(i=0;i<8;i++) {
503 int j;
504 for(j=0;j<8;j++) {
505 printf(" %3d", img_dest1[i*8+j]);
506 }
507 printf("\n");
508 }
509
510 printf("out=\n");
511 for(i=0;i<8;i++) {
512 int j;
513 for(j=0;j<8;j++) {
514 printf(" %3d", img_dest[i*8+j]);
515 }
516 printf("\n");
517 }
518 #endif
519 }
520 printf("%s %s: err_inf=%d\n",
521 1 ? "IDCT248" : "DCT248",
522 name, err_max);
523
524 ti = gettime();
525 it1 = 0;
526 do {
527 for(it=0;it<NB_ITS_SPEED;it++) {
528 for(i=0; i<64; i++)
529 block[i]= block1[i];
530 // memcpy(block, block1, sizeof(DCTELEM) * 64);
531 // do not memcpy especially not fastmemcpy because it does movntq !!!
532 idct248_put(img_dest, 8, block);
533 }
534 it1 += NB_ITS_SPEED;
535 ti1 = gettime() - ti;
536 } while (ti1 < 1000000);
537 mmx_emms();
538
539 printf("%s %s: %0.1f kdct/s\n",
540 1 ? "IDCT248" : "DCT248",
541 name, (double)it1 * 1000.0 / (double)ti1);
542 }
543
544 static void help(void)
545 {
546 printf("dct-test [-i] [<test-number>]\n"
547 "test-number 0 -> test with random matrixes\n"
548 " 1 -> test with random sparse matrixes\n"
549 " 2 -> do 3. test from mpeg4 std\n"
550 "-i test IDCT implementations\n"
551 "-4 test IDCT248 implementations\n");
552 }
553
554 int main(int argc, char **argv)
555 {
556 int test_idct = 0, test_248_dct = 0;
557 int c,i;
558 int test=1;
559 cpu_flags = mm_support();
560
561 ff_ref_dct_init();
562 idct_mmx_init();
563
564 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
565 for(i=0;i<MAX_NEG_CROP;i++) {
566 cropTbl[i] = 0;
567 cropTbl[i + MAX_NEG_CROP + 256] = 255;
568 }
569
570 for(;;) {
571 c = getopt(argc, argv, "ih4");
572 if (c == -1)
573 break;
574 switch(c) {
575 case 'i':
576 test_idct = 1;
577 break;
578 case '4':
579 test_248_dct = 1;
580 break;
581 default :
582 case 'h':
583 help();
584 return 0;
585 }
586 }
587
588 if(optind <argc) test= atoi(argv[optind]);
589
590 printf("ffmpeg DCT/IDCT test\n");
591
592 if (test_248_dct) {
593 idct248_error("SIMPLE-C", ff_simple_idct248_put);
594 } else {
595 for (i=0;algos[i].name;i++)
596 if (algos[i].is_idct == test_idct && !(~cpu_flags & algos[i].mm_support)) {
597 dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
598 }
599 }
600 return 0;
601 }