217feb928c6f772e7d8dad32bc4fcb9b7ba1ee5a
[libav.git] / libavcodec / dct-test.c
1 /*
2 * (c) 2001 Fabrice Bellard
3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file dct-test.c
24 * DCT test. (c) 2001 Fabrice Bellard.
25 * Started from sample code by Juan J. Sierralta P.
26 */
27
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <string.h>
31 #include <sys/time.h>
32 #include <unistd.h>
33 #include <math.h>
34
35 #include "libavutil/common.h"
36
37 #include "simple_idct.h"
38 #include "faandct.h"
39 #include "faanidct.h"
40 #include "i386/idct_xvid.h"
41
42 #undef printf
43 #undef random
44
45 void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
46
47 /* reference fdct/idct */
48 extern void fdct(DCTELEM *block);
49 extern void idct(DCTELEM *block);
50 extern void init_fdct();
51
52 extern void ff_mmx_idct(DCTELEM *data);
53 extern void ff_mmxext_idct(DCTELEM *data);
54
55 extern void odivx_idct_c (short *block);
56
57 // BFIN
58 extern void ff_bfin_idct (DCTELEM *block) ;
59 extern void ff_bfin_fdct (DCTELEM *block) ;
60
61 // ALTIVEC
62 extern void fdct_altivec (DCTELEM *block);
63 //extern void idct_altivec (DCTELEM *block);?? no routine
64
65
66 struct algo {
67 const char *name;
68 enum { FDCT, IDCT } is_idct;
69 void (* func) (DCTELEM *block);
70 void (* ref) (DCTELEM *block);
71 enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, SSE2_PERM } format;
72 int mm_support;
73 };
74
75 #ifndef FAAN_POSTSCALE
76 #define FAAN_SCALE SCALE_PERM
77 #else
78 #define FAAN_SCALE NO_PERM
79 #endif
80
81 static int cpu_flags;
82
83 struct algo algos[] = {
84 {"REF-DBL", 0, fdct, fdct, NO_PERM},
85 {"FAAN", 0, ff_faandct, fdct, FAAN_SCALE},
86 {"FAANI", 1, ff_faanidct, idct, NO_PERM},
87 {"IJG-AAN-INT", 0, fdct_ifast, fdct, SCALE_PERM},
88 {"IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, NO_PERM},
89 {"REF-DBL", 1, idct, idct, NO_PERM},
90 {"INT", 1, j_rev_dct, idct, MMX_PERM},
91 {"SIMPLE-C", 1, ff_simple_idct, idct, NO_PERM},
92
93 #ifdef HAVE_MMX
94 {"MMX", 0, ff_fdct_mmx, fdct, NO_PERM, MM_MMX},
95 #ifdef HAVE_MMX2
96 {"MMX2", 0, ff_fdct_mmx2, fdct, NO_PERM, MM_MMXEXT},
97 #endif
98
99 #ifdef CONFIG_GPL
100 {"LIBMPEG2-MMX", 1, ff_mmx_idct, idct, MMX_PERM, MM_MMX},
101 {"LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, MMX_PERM, MM_MMXEXT},
102 #endif
103 {"SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM, MM_MMX},
104 {"XVID-MMX", 1, ff_idct_xvid_mmx, idct, NO_PERM, MM_MMX},
105 {"XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, NO_PERM, MM_MMXEXT},
106 {"XVID-SSE2", 1, ff_idct_xvid_sse2, idct, SSE2_PERM, MM_SSE2},
107 #endif
108
109 #ifdef HAVE_ALTIVEC
110 {"altivecfdct", 0, fdct_altivec, fdct, NO_PERM, MM_ALTIVEC},
111 #endif
112
113 #ifdef ARCH_BFIN
114 {"BFINfdct", 0, ff_bfin_fdct, fdct, NO_PERM},
115 {"BFINidct", 1, ff_bfin_idct, idct, NO_PERM},
116 #endif
117
118 { 0 }
119 };
120
121 #define AANSCALE_BITS 12
122 static const unsigned short aanscales[64] = {
123 /* precomputed values scaled up by 14 bits */
124 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
125 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
126 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
127 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
128 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
129 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
130 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
131 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
132 };
133
134 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
135
136 int64_t gettime(void)
137 {
138 struct timeval tv;
139 gettimeofday(&tv,NULL);
140 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
141 }
142
143 #define NB_ITS 20000
144 #define NB_ITS_SPEED 50000
145
146 static short idct_mmx_perm[64];
147
148 static short idct_simple_mmx_perm[64]={
149 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
150 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
151 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
152 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
153 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
154 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
155 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
156 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
157 };
158
159 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
160
161 void idct_mmx_init(void)
162 {
163 int i;
164
165 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
166 for (i = 0; i < 64; i++) {
167 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
168 // idct_simple_mmx_perm[i] = simple_block_permute_op(i);
169 }
170 }
171
172 static DCTELEM block[64] __attribute__ ((aligned (16)));
173 static DCTELEM block1[64] __attribute__ ((aligned (8)));
174 static DCTELEM block_org[64] __attribute__ ((aligned (8)));
175
176 static inline void mmx_emms(void)
177 {
178 #ifdef HAVE_MMX
179 if (cpu_flags & MM_MMX)
180 __asm__ volatile ("emms\n\t");
181 #endif
182 }
183
184 void dct_error(const char *name, int is_idct,
185 void (*fdct_func)(DCTELEM *block),
186 void (*fdct_ref)(DCTELEM *block), int form, int test)
187 {
188 int it, i, scale;
189 int err_inf, v;
190 int64_t err2, ti, ti1, it1;
191 int64_t sysErr[64], sysErrMax=0;
192 int maxout=0;
193 int blockSumErrMax=0, blockSumErr;
194
195 srandom(0);
196
197 err_inf = 0;
198 err2 = 0;
199 for(i=0; i<64; i++) sysErr[i]=0;
200 for(it=0;it<NB_ITS;it++) {
201 for(i=0;i<64;i++)
202 block1[i] = 0;
203 switch(test){
204 case 0:
205 for(i=0;i<64;i++)
206 block1[i] = (random() % 512) -256;
207 if (is_idct){
208 fdct(block1);
209
210 for(i=0;i<64;i++)
211 block1[i]>>=3;
212 }
213 break;
214 case 1:{
215 int num= (random()%10)+1;
216 for(i=0;i<num;i++)
217 block1[random()%64] = (random() % 512) -256;
218 }break;
219 case 2:
220 block1[0]= (random()%4096)-2048;
221 block1[63]= (block1[0]&1)^1;
222 break;
223 }
224
225 #if 0 // simulate mismatch control
226 { int sum=0;
227 for(i=0;i<64;i++)
228 sum+=block1[i];
229
230 if((sum&1)==0) block1[63]^=1;
231 }
232 #endif
233
234 for(i=0; i<64; i++)
235 block_org[i]= block1[i];
236
237 if (form == MMX_PERM) {
238 for(i=0;i<64;i++)
239 block[idct_mmx_perm[i]] = block1[i];
240 } else if (form == MMX_SIMPLE_PERM) {
241 for(i=0;i<64;i++)
242 block[idct_simple_mmx_perm[i]] = block1[i];
243
244 } else if (form == SSE2_PERM) {
245 for(i=0; i<64; i++)
246 block[(i&0x38) | idct_sse2_row_perm[i&7]] = block1[i];
247 } else {
248 for(i=0; i<64; i++)
249 block[i]= block1[i];
250 }
251 #if 0 // simulate mismatch control for tested IDCT but not the ref
252 { int sum=0;
253 for(i=0;i<64;i++)
254 sum+=block[i];
255
256 if((sum&1)==0) block[63]^=1;
257 }
258 #endif
259
260 fdct_func(block);
261 mmx_emms();
262
263 if (form == SCALE_PERM) {
264 for(i=0; i<64; i++) {
265 scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
266 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
267 }
268 }
269
270 fdct_ref(block1);
271
272 blockSumErr=0;
273 for(i=0;i<64;i++) {
274 v = abs(block[i] - block1[i]);
275 if (v > err_inf)
276 err_inf = v;
277 err2 += v * v;
278 sysErr[i] += block[i] - block1[i];
279 blockSumErr += v;
280 if( abs(block[i])>maxout) maxout=abs(block[i]);
281 }
282 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
283 #if 0 // print different matrix pairs
284 if(blockSumErr){
285 printf("\n");
286 for(i=0; i<64; i++){
287 if((i&7)==0) printf("\n");
288 printf("%4d ", block_org[i]);
289 }
290 for(i=0; i<64; i++){
291 if((i&7)==0) printf("\n");
292 printf("%4d ", block[i] - block1[i]);
293 }
294 }
295 #endif
296 }
297 for(i=0; i<64; i++) sysErrMax= FFMAX(sysErrMax, FFABS(sysErr[i]));
298
299 #if 1 // dump systematic errors
300 for(i=0; i<64; i++){
301 if(i%8==0) printf("\n");
302 printf("%5d ", (int)sysErr[i]);
303 }
304 printf("\n");
305 #endif
306
307 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
308 is_idct ? "IDCT" : "DCT",
309 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
310 #if 1 //Speed test
311 /* speed test */
312 for(i=0;i<64;i++)
313 block1[i] = 0;
314 switch(test){
315 case 0:
316 for(i=0;i<64;i++)
317 block1[i] = (random() % 512) -256;
318 if (is_idct){
319 fdct(block1);
320
321 for(i=0;i<64;i++)
322 block1[i]>>=3;
323 }
324 break;
325 case 1:{
326 case 2:
327 block1[0] = (random() % 512) -256;
328 block1[1] = (random() % 512) -256;
329 block1[2] = (random() % 512) -256;
330 block1[3] = (random() % 512) -256;
331 }break;
332 }
333
334 if (form == MMX_PERM) {
335 for(i=0;i<64;i++)
336 block[idct_mmx_perm[i]] = block1[i];
337 } else if(form == MMX_SIMPLE_PERM) {
338 for(i=0;i<64;i++)
339 block[idct_simple_mmx_perm[i]] = block1[i];
340 } else {
341 for(i=0; i<64; i++)
342 block[i]= block1[i];
343 }
344
345 ti = gettime();
346 it1 = 0;
347 do {
348 for(it=0;it<NB_ITS_SPEED;it++) {
349 for(i=0; i<64; i++)
350 block[i]= block1[i];
351 // memcpy(block, block1, sizeof(DCTELEM) * 64);
352 // do not memcpy especially not fastmemcpy because it does movntq !!!
353 fdct_func(block);
354 }
355 it1 += NB_ITS_SPEED;
356 ti1 = gettime() - ti;
357 } while (ti1 < 1000000);
358 mmx_emms();
359
360 printf("%s %s: %0.1f kdct/s\n",
361 is_idct ? "IDCT" : "DCT",
362 name, (double)it1 * 1000.0 / (double)ti1);
363 #endif
364 }
365
366 static uint8_t img_dest[64] __attribute__ ((aligned (8)));
367 static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
368
369 void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
370 {
371 static int init;
372 static double c8[8][8];
373 static double c4[4][4];
374 double block1[64], block2[64], block3[64];
375 double s, sum, v;
376 int i, j, k;
377
378 if (!init) {
379 init = 1;
380
381 for(i=0;i<8;i++) {
382 sum = 0;
383 for(j=0;j<8;j++) {
384 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
385 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
386 sum += c8[i][j] * c8[i][j];
387 }
388 }
389
390 for(i=0;i<4;i++) {
391 sum = 0;
392 for(j=0;j<4;j++) {
393 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
394 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
395 sum += c4[i][j] * c4[i][j];
396 }
397 }
398 }
399
400 /* butterfly */
401 s = 0.5 * sqrt(2.0);
402 for(i=0;i<4;i++) {
403 for(j=0;j<8;j++) {
404 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
405 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
406 }
407 }
408
409 /* idct8 on lines */
410 for(i=0;i<8;i++) {
411 for(j=0;j<8;j++) {
412 sum = 0;
413 for(k=0;k<8;k++)
414 sum += c8[k][j] * block1[8*i+k];
415 block2[8*i+j] = sum;
416 }
417 }
418
419 /* idct4 */
420 for(i=0;i<8;i++) {
421 for(j=0;j<4;j++) {
422 /* top */
423 sum = 0;
424 for(k=0;k<4;k++)
425 sum += c4[k][j] * block2[8*(2*k)+i];
426 block3[8*(2*j)+i] = sum;
427
428 /* bottom */
429 sum = 0;
430 for(k=0;k<4;k++)
431 sum += c4[k][j] * block2[8*(2*k+1)+i];
432 block3[8*(2*j+1)+i] = sum;
433 }
434 }
435
436 /* clamp and store the result */
437 for(i=0;i<8;i++) {
438 for(j=0;j<8;j++) {
439 v = block3[8*i+j];
440 if (v < 0)
441 v = 0;
442 else if (v > 255)
443 v = 255;
444 dest[i * linesize + j] = (int)rint(v);
445 }
446 }
447 }
448
449 void idct248_error(const char *name,
450 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
451 {
452 int it, i, it1, ti, ti1, err_max, v;
453
454 srandom(0);
455
456 /* just one test to see if code is correct (precision is less
457 important here) */
458 err_max = 0;
459 for(it=0;it<NB_ITS;it++) {
460
461 /* XXX: use forward transform to generate values */
462 for(i=0;i<64;i++)
463 block1[i] = (random() % 256) - 128;
464 block1[0] += 1024;
465
466 for(i=0; i<64; i++)
467 block[i]= block1[i];
468 idct248_ref(img_dest1, 8, block);
469
470 for(i=0; i<64; i++)
471 block[i]= block1[i];
472 idct248_put(img_dest, 8, block);
473
474 for(i=0;i<64;i++) {
475 v = abs((int)img_dest[i] - (int)img_dest1[i]);
476 if (v == 255)
477 printf("%d %d\n", img_dest[i], img_dest1[i]);
478 if (v > err_max)
479 err_max = v;
480 }
481 #if 0
482 printf("ref=\n");
483 for(i=0;i<8;i++) {
484 int j;
485 for(j=0;j<8;j++) {
486 printf(" %3d", img_dest1[i*8+j]);
487 }
488 printf("\n");
489 }
490
491 printf("out=\n");
492 for(i=0;i<8;i++) {
493 int j;
494 for(j=0;j<8;j++) {
495 printf(" %3d", img_dest[i*8+j]);
496 }
497 printf("\n");
498 }
499 #endif
500 }
501 printf("%s %s: err_inf=%d\n",
502 1 ? "IDCT248" : "DCT248",
503 name, err_max);
504
505 ti = gettime();
506 it1 = 0;
507 do {
508 for(it=0;it<NB_ITS_SPEED;it++) {
509 for(i=0; i<64; i++)
510 block[i]= block1[i];
511 // memcpy(block, block1, sizeof(DCTELEM) * 64);
512 // do not memcpy especially not fastmemcpy because it does movntq !!!
513 idct248_put(img_dest, 8, block);
514 }
515 it1 += NB_ITS_SPEED;
516 ti1 = gettime() - ti;
517 } while (ti1 < 1000000);
518 mmx_emms();
519
520 printf("%s %s: %0.1f kdct/s\n",
521 1 ? "IDCT248" : "DCT248",
522 name, (double)it1 * 1000.0 / (double)ti1);
523 }
524
525 void help(void)
526 {
527 printf("dct-test [-i] [<test-number>]\n"
528 "test-number 0 -> test with random matrixes\n"
529 " 1 -> test with random sparse matrixes\n"
530 " 2 -> do 3. test from mpeg4 std\n"
531 "-i test IDCT implementations\n"
532 "-4 test IDCT248 implementations\n");
533 }
534
535 int main(int argc, char **argv)
536 {
537 int test_idct = 0, test_248_dct = 0;
538 int c,i;
539 int test=1;
540 cpu_flags = mm_support();
541
542 init_fdct();
543 idct_mmx_init();
544
545 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
546 for(i=0;i<MAX_NEG_CROP;i++) {
547 cropTbl[i] = 0;
548 cropTbl[i + MAX_NEG_CROP + 256] = 255;
549 }
550
551 for(;;) {
552 c = getopt(argc, argv, "ih4");
553 if (c == -1)
554 break;
555 switch(c) {
556 case 'i':
557 test_idct = 1;
558 break;
559 case '4':
560 test_248_dct = 1;
561 break;
562 default :
563 case 'h':
564 help();
565 return 0;
566 }
567 }
568
569 if(optind <argc) test= atoi(argv[optind]);
570
571 printf("ffmpeg DCT/IDCT test\n");
572
573 if (test_248_dct) {
574 idct248_error("SIMPLE-C", ff_simple_idct248_put);
575 } else {
576 for (i=0;algos[i].name;i++)
577 if (algos[i].is_idct == test_idct && !(~cpu_flags & algos[i].mm_support)) {
578 dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
579 }
580 }
581 return 0;
582 }