dct-test: remove commented out code
[libav.git] / libavcodec / dct-test.c
1 /*
2 * (c) 2001 Fabrice Bellard
3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
4 *
5 * This file is part of Libav.
6 *
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file
24 * DCT test (c) 2001 Fabrice Bellard
25 * Started from sample code by Juan J. Sierralta P.
26 */
27
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <string.h>
31 #include <sys/time.h>
32 #include <unistd.h>
33 #include <math.h>
34
35 #include "libavutil/cpu.h"
36 #include "libavutil/common.h"
37 #include "libavutil/lfg.h"
38
39 #include "simple_idct.h"
40 #include "aandcttab.h"
41 #include "faandct.h"
42 #include "faanidct.h"
43 #include "x86/idct_xvid.h"
44 #include "dctref.h"
45
46 #undef printf
47
48 void ff_mmx_idct(DCTELEM *data);
49 void ff_mmxext_idct(DCTELEM *data);
50
51 void odivx_idct_c(short *block);
52
53 // BFIN
54 void ff_bfin_idct(DCTELEM *block);
55 void ff_bfin_fdct(DCTELEM *block);
56
57 // ALTIVEC
58 void fdct_altivec(DCTELEM *block);
59 //void idct_altivec(DCTELEM *block);?? no routine
60
61 // ARM
62 void ff_j_rev_dct_arm(DCTELEM *data);
63 void ff_simple_idct_arm(DCTELEM *data);
64 void ff_simple_idct_armv5te(DCTELEM *data);
65 void ff_simple_idct_armv6(DCTELEM *data);
66 void ff_simple_idct_neon(DCTELEM *data);
67
68 void ff_simple_idct_axp(DCTELEM *data);
69
70 struct algo {
71 const char *name;
72 enum { FDCT, IDCT } is_idct;
73 void (* func) (DCTELEM *block);
74 void (* ref) (DCTELEM *block);
75 enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, SSE2_PERM, PARTTRANS_PERM } format;
76 int mm_support;
77 };
78
79 #ifndef FAAN_POSTSCALE
80 #define FAAN_SCALE SCALE_PERM
81 #else
82 #define FAAN_SCALE NO_PERM
83 #endif
84
85 static int cpu_flags;
86
87 struct algo algos[] = {
88 {"REF-DBL", 0, ff_ref_fdct, ff_ref_fdct, NO_PERM},
89 {"FAAN", 0, ff_faandct, ff_ref_fdct, FAAN_SCALE},
90 {"FAANI", 1, ff_faanidct, ff_ref_idct, NO_PERM},
91 {"IJG-AAN-INT", 0, fdct_ifast, ff_ref_fdct, SCALE_PERM},
92 {"IJG-LLM-INT", 0, ff_jpeg_fdct_islow, ff_ref_fdct, NO_PERM},
93 {"REF-DBL", 1, ff_ref_idct, ff_ref_idct, NO_PERM},
94 {"INT", 1, j_rev_dct, ff_ref_idct, MMX_PERM},
95 {"SIMPLE-C", 1, ff_simple_idct, ff_ref_idct, NO_PERM},
96
97 #if HAVE_MMX
98 {"MMX", 0, ff_fdct_mmx, ff_ref_fdct, NO_PERM, AV_CPU_FLAG_MMX},
99 #if HAVE_MMX2
100 {"MMX2", 0, ff_fdct_mmx2, ff_ref_fdct, NO_PERM, AV_CPU_FLAG_MMX2},
101 {"SSE2", 0, ff_fdct_sse2, ff_ref_fdct, NO_PERM, AV_CPU_FLAG_SSE2},
102 #endif
103
104 #if CONFIG_GPL
105 {"LIBMPEG2-MMX", 1, ff_mmx_idct, ff_ref_idct, MMX_PERM, AV_CPU_FLAG_MMX},
106 {"LIBMPEG2-MMX2", 1, ff_mmxext_idct, ff_ref_idct, MMX_PERM, AV_CPU_FLAG_MMX2},
107 #endif
108 {"SIMPLE-MMX", 1, ff_simple_idct_mmx, ff_ref_idct, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX},
109 {"XVID-MMX", 1, ff_idct_xvid_mmx, ff_ref_idct, NO_PERM, AV_CPU_FLAG_MMX},
110 {"XVID-MMX2", 1, ff_idct_xvid_mmx2, ff_ref_idct, NO_PERM, AV_CPU_FLAG_MMX2},
111 {"XVID-SSE2", 1, ff_idct_xvid_sse2, ff_ref_idct, SSE2_PERM, AV_CPU_FLAG_SSE2},
112 #endif
113
114 #if HAVE_ALTIVEC
115 {"altivecfdct", 0, fdct_altivec, ff_ref_fdct, NO_PERM, AV_CPU_FLAG_ALTIVEC},
116 #endif
117
118 #if ARCH_BFIN
119 {"BFINfdct", 0, ff_bfin_fdct, ff_ref_fdct, NO_PERM},
120 {"BFINidct", 1, ff_bfin_idct, ff_ref_idct, NO_PERM},
121 #endif
122
123 #if ARCH_ARM
124 {"SIMPLE-ARM", 1, ff_simple_idct_arm, ff_ref_idct, NO_PERM },
125 {"INT-ARM", 1, ff_j_rev_dct_arm, ff_ref_idct, MMX_PERM },
126 #if HAVE_ARMV5TE
127 {"SIMPLE-ARMV5TE", 1, ff_simple_idct_armv5te, ff_ref_idct, NO_PERM },
128 #endif
129 #if HAVE_ARMV6
130 {"SIMPLE-ARMV6", 1, ff_simple_idct_armv6, ff_ref_idct, MMX_PERM },
131 #endif
132 #if HAVE_NEON
133 {"SIMPLE-NEON", 1, ff_simple_idct_neon, ff_ref_idct, PARTTRANS_PERM },
134 #endif
135 #endif /* ARCH_ARM */
136
137 #if ARCH_ALPHA
138 {"SIMPLE-ALPHA", 1, ff_simple_idct_axp, ff_ref_idct, NO_PERM },
139 #endif
140
141 { 0 }
142 };
143
144 #define AANSCALE_BITS 12
145
146 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
147
148 static int64_t gettime(void)
149 {
150 struct timeval tv;
151 gettimeofday(&tv,NULL);
152 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
153 }
154
155 #define NB_ITS 20000
156 #define NB_ITS_SPEED 50000
157
158 static short idct_mmx_perm[64];
159
160 static short idct_simple_mmx_perm[64]={
161 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
162 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
163 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
164 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
165 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
166 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
167 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
168 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
169 };
170
171 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
172
173 static void idct_mmx_init(void)
174 {
175 int i;
176
177 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
178 for (i = 0; i < 64; i++) {
179 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
180 }
181 }
182
183 DECLARE_ALIGNED(16, static DCTELEM, block)[64];
184 DECLARE_ALIGNED(8, static DCTELEM, block1)[64];
185 DECLARE_ALIGNED(8, static DCTELEM, block_org)[64];
186
187 static inline void mmx_emms(void)
188 {
189 #if HAVE_MMX
190 if (cpu_flags & AV_CPU_FLAG_MMX)
191 __asm__ volatile ("emms\n\t");
192 #endif
193 }
194
195 static void dct_error(const char *name, int is_idct,
196 void (*fdct_func)(DCTELEM *block),
197 void (*fdct_ref)(DCTELEM *block), int form, int test)
198 {
199 int it, i, scale;
200 int err_inf, v;
201 int64_t err2, ti, ti1, it1;
202 int64_t sysErr[64], sysErrMax=0;
203 int maxout=0;
204 int blockSumErrMax=0, blockSumErr;
205 AVLFG prng;
206
207 av_lfg_init(&prng, 1);
208
209 err_inf = 0;
210 err2 = 0;
211 for(i=0; i<64; i++) sysErr[i]=0;
212 for(it=0;it<NB_ITS;it++) {
213 for(i=0;i<64;i++)
214 block1[i] = 0;
215 switch(test){
216 case 0:
217 for(i=0;i<64;i++)
218 block1[i] = (av_lfg_get(&prng) % 512) -256;
219 if (is_idct){
220 ff_ref_fdct(block1);
221
222 for(i=0;i<64;i++)
223 block1[i]>>=3;
224 }
225 break;
226 case 1:{
227 int num = av_lfg_get(&prng) % 10 + 1;
228 for(i=0;i<num;i++)
229 block1[av_lfg_get(&prng) % 64] = av_lfg_get(&prng) % 512 -256;
230 }break;
231 case 2:
232 block1[0] = av_lfg_get(&prng) % 4096 - 2048;
233 block1[63]= (block1[0]&1)^1;
234 break;
235 }
236
237 for(i=0; i<64; i++)
238 block_org[i]= block1[i];
239
240 if (form == MMX_PERM) {
241 for(i=0;i<64;i++)
242 block[idct_mmx_perm[i]] = block1[i];
243 } else if (form == MMX_SIMPLE_PERM) {
244 for(i=0;i<64;i++)
245 block[idct_simple_mmx_perm[i]] = block1[i];
246
247 } else if (form == SSE2_PERM) {
248 for(i=0; i<64; i++)
249 block[(i&0x38) | idct_sse2_row_perm[i&7]] = block1[i];
250 } else if (form == PARTTRANS_PERM) {
251 for(i=0; i<64; i++)
252 block[(i&0x24) | ((i&3)<<3) | ((i>>3)&3)] = block1[i];
253 } else {
254 for(i=0; i<64; i++)
255 block[i]= block1[i];
256 }
257
258 fdct_func(block);
259 mmx_emms();
260
261 if (form == SCALE_PERM) {
262 for(i=0; i<64; i++) {
263 scale = 8*(1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
264 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
265 }
266 }
267
268 fdct_ref(block1);
269
270 blockSumErr=0;
271 for(i=0;i<64;i++) {
272 v = abs(block[i] - block1[i]);
273 if (v > err_inf)
274 err_inf = v;
275 err2 += v * v;
276 sysErr[i] += block[i] - block1[i];
277 blockSumErr += v;
278 if( abs(block[i])>maxout) maxout=abs(block[i]);
279 }
280 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
281 }
282 for(i=0; i<64; i++) sysErrMax= FFMAX(sysErrMax, FFABS(sysErr[i]));
283
284 for(i=0; i<64; i++){
285 if(i%8==0) printf("\n");
286 printf("%7d ", (int)sysErr[i]);
287 }
288 printf("\n");
289
290 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
291 is_idct ? "IDCT" : "DCT",
292 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
293
294 /* speed test */
295 for(i=0;i<64;i++)
296 block1[i] = 0;
297 switch(test){
298 case 0:
299 for(i=0;i<64;i++)
300 block1[i] = av_lfg_get(&prng) % 512 -256;
301 if (is_idct){
302 ff_ref_fdct(block1);
303
304 for(i=0;i<64;i++)
305 block1[i]>>=3;
306 }
307 break;
308 case 1:{
309 case 2:
310 block1[0] = av_lfg_get(&prng) % 512 -256;
311 block1[1] = av_lfg_get(&prng) % 512 -256;
312 block1[2] = av_lfg_get(&prng) % 512 -256;
313 block1[3] = av_lfg_get(&prng) % 512 -256;
314 }break;
315 }
316
317 if (form == MMX_PERM) {
318 for(i=0;i<64;i++)
319 block[idct_mmx_perm[i]] = block1[i];
320 } else if(form == MMX_SIMPLE_PERM) {
321 for(i=0;i<64;i++)
322 block[idct_simple_mmx_perm[i]] = block1[i];
323 } else {
324 for(i=0; i<64; i++)
325 block[i]= block1[i];
326 }
327
328 ti = gettime();
329 it1 = 0;
330 do {
331 for(it=0;it<NB_ITS_SPEED;it++) {
332 for(i=0; i<64; i++)
333 block[i]= block1[i];
334 fdct_func(block);
335 }
336 it1 += NB_ITS_SPEED;
337 ti1 = gettime() - ti;
338 } while (ti1 < 1000000);
339 mmx_emms();
340
341 printf("%s %s: %0.1f kdct/s\n",
342 is_idct ? "IDCT" : "DCT",
343 name, (double)it1 * 1000.0 / (double)ti1);
344 }
345
346 DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
347 DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];
348
349 static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
350 {
351 static int init;
352 static double c8[8][8];
353 static double c4[4][4];
354 double block1[64], block2[64], block3[64];
355 double s, sum, v;
356 int i, j, k;
357
358 if (!init) {
359 init = 1;
360
361 for(i=0;i<8;i++) {
362 sum = 0;
363 for(j=0;j<8;j++) {
364 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
365 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
366 sum += c8[i][j] * c8[i][j];
367 }
368 }
369
370 for(i=0;i<4;i++) {
371 sum = 0;
372 for(j=0;j<4;j++) {
373 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
374 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
375 sum += c4[i][j] * c4[i][j];
376 }
377 }
378 }
379
380 /* butterfly */
381 s = 0.5 * sqrt(2.0);
382 for(i=0;i<4;i++) {
383 for(j=0;j<8;j++) {
384 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
385 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
386 }
387 }
388
389 /* idct8 on lines */
390 for(i=0;i<8;i++) {
391 for(j=0;j<8;j++) {
392 sum = 0;
393 for(k=0;k<8;k++)
394 sum += c8[k][j] * block1[8*i+k];
395 block2[8*i+j] = sum;
396 }
397 }
398
399 /* idct4 */
400 for(i=0;i<8;i++) {
401 for(j=0;j<4;j++) {
402 /* top */
403 sum = 0;
404 for(k=0;k<4;k++)
405 sum += c4[k][j] * block2[8*(2*k)+i];
406 block3[8*(2*j)+i] = sum;
407
408 /* bottom */
409 sum = 0;
410 for(k=0;k<4;k++)
411 sum += c4[k][j] * block2[8*(2*k+1)+i];
412 block3[8*(2*j+1)+i] = sum;
413 }
414 }
415
416 /* clamp and store the result */
417 for(i=0;i<8;i++) {
418 for(j=0;j<8;j++) {
419 v = block3[8*i+j];
420 if (v < 0)
421 v = 0;
422 else if (v > 255)
423 v = 255;
424 dest[i * linesize + j] = (int)rint(v);
425 }
426 }
427 }
428
429 static void idct248_error(const char *name,
430 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
431 {
432 int it, i, it1, ti, ti1, err_max, v;
433
434 AVLFG prng;
435
436 av_lfg_init(&prng, 1);
437
438 /* just one test to see if code is correct (precision is less
439 important here) */
440 err_max = 0;
441 for(it=0;it<NB_ITS;it++) {
442
443 /* XXX: use forward transform to generate values */
444 for(i=0;i<64;i++)
445 block1[i] = av_lfg_get(&prng) % 256 - 128;
446 block1[0] += 1024;
447
448 for(i=0; i<64; i++)
449 block[i]= block1[i];
450 idct248_ref(img_dest1, 8, block);
451
452 for(i=0; i<64; i++)
453 block[i]= block1[i];
454 idct248_put(img_dest, 8, block);
455
456 for(i=0;i<64;i++) {
457 v = abs((int)img_dest[i] - (int)img_dest1[i]);
458 if (v == 255)
459 printf("%d %d\n", img_dest[i], img_dest1[i]);
460 if (v > err_max)
461 err_max = v;
462 }
463 }
464 printf("%s %s: err_inf=%d\n",
465 1 ? "IDCT248" : "DCT248",
466 name, err_max);
467
468 ti = gettime();
469 it1 = 0;
470 do {
471 for(it=0;it<NB_ITS_SPEED;it++) {
472 for(i=0; i<64; i++)
473 block[i]= block1[i];
474 idct248_put(img_dest, 8, block);
475 }
476 it1 += NB_ITS_SPEED;
477 ti1 = gettime() - ti;
478 } while (ti1 < 1000000);
479 mmx_emms();
480
481 printf("%s %s: %0.1f kdct/s\n",
482 1 ? "IDCT248" : "DCT248",
483 name, (double)it1 * 1000.0 / (double)ti1);
484 }
485
486 static void help(void)
487 {
488 printf("dct-test [-i] [<test-number>]\n"
489 "test-number 0 -> test with random matrixes\n"
490 " 1 -> test with random sparse matrixes\n"
491 " 2 -> do 3. test from mpeg4 std\n"
492 "-i test IDCT implementations\n"
493 "-4 test IDCT248 implementations\n");
494 }
495
496 int main(int argc, char **argv)
497 {
498 int test_idct = 0, test_248_dct = 0;
499 int c,i;
500 int test=1;
501 cpu_flags = av_get_cpu_flags();
502
503 ff_ref_dct_init();
504 idct_mmx_init();
505
506 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
507 for(i=0;i<MAX_NEG_CROP;i++) {
508 cropTbl[i] = 0;
509 cropTbl[i + MAX_NEG_CROP + 256] = 255;
510 }
511
512 for(;;) {
513 c = getopt(argc, argv, "ih4");
514 if (c == -1)
515 break;
516 switch(c) {
517 case 'i':
518 test_idct = 1;
519 break;
520 case '4':
521 test_248_dct = 1;
522 break;
523 default :
524 case 'h':
525 help();
526 return 0;
527 }
528 }
529
530 if(optind <argc) test= atoi(argv[optind]);
531
532 printf("ffmpeg DCT/IDCT test\n");
533
534 if (test_248_dct) {
535 idct248_error("SIMPLE-C", ff_simple_idct248_put);
536 } else {
537 for (i=0;algos[i].name;i++)
538 if (algos[i].is_idct == test_idct && !(~cpu_flags & algos[i].mm_support)) {
539 dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
540 }
541 }
542 return 0;
543 }