dct-test: remove write-only variable
[libav.git] / libavcodec / dct-test.c
1 /*
2 * (c) 2001 Fabrice Bellard
3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
4 *
5 * This file is part of Libav.
6 *
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file
24 * DCT test (c) 2001 Fabrice Bellard
25 * Started from sample code by Juan J. Sierralta P.
26 */
27
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <string.h>
31 #include <sys/time.h>
32 #include <unistd.h>
33 #include <math.h>
34
35 #include "libavutil/cpu.h"
36 #include "libavutil/common.h"
37 #include "libavutil/lfg.h"
38
39 #include "simple_idct.h"
40 #include "aandcttab.h"
41 #include "faandct.h"
42 #include "faanidct.h"
43 #include "x86/idct_xvid.h"
44 #include "dctref.h"
45
46 #undef printf
47
48 void ff_mmx_idct(DCTELEM *data);
49 void ff_mmxext_idct(DCTELEM *data);
50
51 void odivx_idct_c(short *block);
52
53 // BFIN
54 void ff_bfin_idct(DCTELEM *block);
55 void ff_bfin_fdct(DCTELEM *block);
56
57 // ALTIVEC
58 void fdct_altivec(DCTELEM *block);
59 //void idct_altivec(DCTELEM *block);?? no routine
60
61 // ARM
62 void ff_j_rev_dct_arm(DCTELEM *data);
63 void ff_simple_idct_arm(DCTELEM *data);
64 void ff_simple_idct_armv5te(DCTELEM *data);
65 void ff_simple_idct_armv6(DCTELEM *data);
66 void ff_simple_idct_neon(DCTELEM *data);
67
68 void ff_simple_idct_axp(DCTELEM *data);
69
70 struct algo {
71 const char *name;
72 void (*func)(DCTELEM *block);
73 enum formattag { NO_PERM, MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM,
74 SSE2_PERM, PARTTRANS_PERM } format;
75 int mm_support;
76 int nonspec;
77 };
78
79 #ifndef FAAN_POSTSCALE
80 #define FAAN_SCALE SCALE_PERM
81 #else
82 #define FAAN_SCALE NO_PERM
83 #endif
84
85 static int cpu_flags;
86
87 static const struct algo fdct_tab[] = {
88 { "REF-DBL", ff_ref_fdct, NO_PERM },
89 { "FAAN", ff_faandct, FAAN_SCALE },
90 { "IJG-AAN-INT", fdct_ifast, SCALE_PERM },
91 { "IJG-LLM-INT", ff_jpeg_fdct_islow, NO_PERM },
92
93 #if HAVE_MMX
94 { "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX },
95 { "MMX2", ff_fdct_mmx2, NO_PERM, AV_CPU_FLAG_MMX2 },
96 { "SSE2", ff_fdct_sse2, NO_PERM, AV_CPU_FLAG_SSE2 },
97 #endif
98
99 #if HAVE_ALTIVEC
100 { "altivecfdct", fdct_altivec, NO_PERM, AV_CPU_FLAG_ALTIVEC },
101 #endif
102
103 #if ARCH_BFIN
104 { "BFINfdct", ff_bfin_fdct, NO_PERM },
105 #endif
106
107 { 0 }
108 };
109
110 static const struct algo idct_tab[] = {
111 { "FAANI", ff_faanidct, NO_PERM },
112 { "REF-DBL", ff_ref_idct, NO_PERM },
113 { "INT", j_rev_dct, MMX_PERM },
114 { "SIMPLE-C", ff_simple_idct, NO_PERM },
115
116 #if HAVE_MMX
117 #if CONFIG_GPL
118 { "LIBMPEG2-MMX", ff_mmx_idct, MMX_PERM, AV_CPU_FLAG_MMX, 1 },
119 { "LIBMPEG2-MMX2", ff_mmxext_idct, MMX_PERM, AV_CPU_FLAG_MMX2, 1 },
120 #endif
121 { "SIMPLE-MMX", ff_simple_idct_mmx, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX },
122 { "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 },
123 { "XVID-MMX2", ff_idct_xvid_mmx2, NO_PERM, AV_CPU_FLAG_MMX2, 1 },
124 { "XVID-SSE2", ff_idct_xvid_sse2, SSE2_PERM, AV_CPU_FLAG_SSE2, 1 },
125 #endif
126
127 #if ARCH_BFIN
128 { "BFINidct", ff_bfin_idct, NO_PERM },
129 #endif
130
131 #if ARCH_ARM
132 { "SIMPLE-ARM", ff_simple_idct_arm, NO_PERM },
133 { "INT-ARM", ff_j_rev_dct_arm, MMX_PERM },
134 #endif
135 #if HAVE_ARMV5TE
136 { "SIMPLE-ARMV5TE", ff_simple_idct_armv5te,NO_PERM },
137 #endif
138 #if HAVE_ARMV6
139 { "SIMPLE-ARMV6", ff_simple_idct_armv6, MMX_PERM },
140 #endif
141 #if HAVE_NEON
142 { "SIMPLE-NEON", ff_simple_idct_neon, PARTTRANS_PERM },
143 #endif
144
145 #if ARCH_ALPHA
146 { "SIMPLE-ALPHA", ff_simple_idct_axp, NO_PERM },
147 #endif
148
149 { 0 }
150 };
151
152 #define AANSCALE_BITS 12
153
154 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
155
156 static int64_t gettime(void)
157 {
158 struct timeval tv;
159 gettimeofday(&tv, NULL);
160 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
161 }
162
163 #define NB_ITS 20000
164 #define NB_ITS_SPEED 50000
165
166 static short idct_mmx_perm[64];
167
168 static short idct_simple_mmx_perm[64] = {
169 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
170 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
171 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
172 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
173 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
174 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
175 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
176 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
177 };
178
179 static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
180
181 static void idct_mmx_init(void)
182 {
183 int i;
184
185 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
186 for (i = 0; i < 64; i++) {
187 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
188 }
189 }
190
191 DECLARE_ALIGNED(16, static DCTELEM, block)[64];
192 DECLARE_ALIGNED(8, static DCTELEM, block1)[64];
193
194 static inline void mmx_emms(void)
195 {
196 #if HAVE_MMX
197 if (cpu_flags & AV_CPU_FLAG_MMX)
198 __asm__ volatile ("emms\n\t");
199 #endif
200 }
201
202 static int dct_error(const struct algo *dct, int test, int is_idct, int speed)
203 {
204 void (*ref)(DCTELEM *block) = is_idct ? ff_ref_idct : ff_ref_fdct;
205 int it, i, scale;
206 int err_inf, v;
207 int64_t err2, ti, ti1, it1, err_sum = 0;
208 int64_t sysErr[64], sysErrMax = 0;
209 int maxout = 0;
210 int blockSumErrMax = 0, blockSumErr;
211 AVLFG prng;
212 double omse, ome;
213 int spec_err;
214
215 av_lfg_init(&prng, 1);
216
217 err_inf = 0;
218 err2 = 0;
219 for (i = 0; i < 64; i++)
220 sysErr[i] = 0;
221 for (it = 0; it < NB_ITS; it++) {
222 for (i = 0; i < 64; i++)
223 block1[i] = 0;
224 switch (test) {
225 case 0:
226 for (i = 0; i < 64; i++)
227 block1[i] = (av_lfg_get(&prng) % 512) - 256;
228 if (is_idct) {
229 ff_ref_fdct(block1);
230 for (i = 0; i < 64; i++)
231 block1[i] >>= 3;
232 }
233 break;
234 case 1: {
235 int num = av_lfg_get(&prng) % 10 + 1;
236 for (i = 0; i < num; i++)
237 block1[av_lfg_get(&prng) % 64] =
238 av_lfg_get(&prng) % 512 - 256;
239 }
240 break;
241 case 2:
242 block1[0] = av_lfg_get(&prng) % 4096 - 2048;
243 block1[63] = (block1[0] & 1) ^ 1;
244 break;
245 }
246
247 if (dct->format == MMX_PERM) {
248 for (i = 0; i < 64; i++)
249 block[idct_mmx_perm[i]] = block1[i];
250 } else if (dct->format == MMX_SIMPLE_PERM) {
251 for (i = 0; i < 64; i++)
252 block[idct_simple_mmx_perm[i]] = block1[i];
253 } else if (dct->format == SSE2_PERM) {
254 for (i = 0; i < 64; i++)
255 block[(i & 0x38) | idct_sse2_row_perm[i & 7]] = block1[i];
256 } else if (dct->format == PARTTRANS_PERM) {
257 for (i = 0; i < 64; i++)
258 block[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = block1[i];
259 } else {
260 for (i = 0; i < 64; i++)
261 block[i] = block1[i];
262 }
263
264 dct->func(block);
265 mmx_emms();
266
267 if (dct->format == SCALE_PERM) {
268 for (i = 0; i < 64; i++) {
269 scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
270 block[i] = (block[i] * scale) >> AANSCALE_BITS;
271 }
272 }
273
274 ref(block1);
275
276 blockSumErr = 0;
277 for (i = 0; i < 64; i++) {
278 int err = block[i] - block1[i];
279 err_sum += err;
280 v = abs(err);
281 if (v > err_inf)
282 err_inf = v;
283 err2 += v * v;
284 sysErr[i] += block[i] - block1[i];
285 blockSumErr += v;
286 if (abs(block[i]) > maxout)
287 maxout = abs(block[i]);
288 }
289 if (blockSumErrMax < blockSumErr)
290 blockSumErrMax = blockSumErr;
291 }
292 for (i = 0; i < 64; i++)
293 sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i]));
294
295 for (i = 0; i < 64; i++) {
296 if (i % 8 == 0)
297 printf("\n");
298 printf("%7d ", (int) sysErr[i]);
299 }
300 printf("\n");
301
302 omse = (double) err2 / NB_ITS / 64;
303 ome = (double) err_sum / NB_ITS / 64;
304
305 spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
306
307 printf("%s %s: ppe=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
308 is_idct ? "IDCT" : "DCT", dct->name, err_inf,
309 omse, ome, (double) sysErrMax / NB_ITS,
310 maxout, blockSumErrMax);
311
312 if (spec_err && !dct->nonspec)
313 return 1;
314
315 if (!speed)
316 return 0;
317
318 /* speed test */
319 for (i = 0; i < 64; i++)
320 block1[i] = 0;
321
322 switch (test) {
323 case 0:
324 for (i = 0; i < 64; i++)
325 block1[i] = av_lfg_get(&prng) % 512 - 256;
326 if (is_idct) {
327 ff_ref_fdct(block1);
328 for (i = 0; i < 64; i++)
329 block1[i] >>= 3;
330 }
331 break;
332 case 1:
333 case 2:
334 block1[0] = av_lfg_get(&prng) % 512 - 256;
335 block1[1] = av_lfg_get(&prng) % 512 - 256;
336 block1[2] = av_lfg_get(&prng) % 512 - 256;
337 block1[3] = av_lfg_get(&prng) % 512 - 256;
338 break;
339 }
340
341 if (dct->format == MMX_PERM) {
342 for (i = 0; i < 64; i++)
343 block[idct_mmx_perm[i]] = block1[i];
344 } else if (dct->format == MMX_SIMPLE_PERM) {
345 for (i = 0; i < 64; i++)
346 block[idct_simple_mmx_perm[i]] = block1[i];
347 } else {
348 for (i = 0; i < 64; i++)
349 block[i] = block1[i];
350 }
351
352 ti = gettime();
353 it1 = 0;
354 do {
355 for (it = 0; it < NB_ITS_SPEED; it++) {
356 for (i = 0; i < 64; i++)
357 block[i] = block1[i];
358 dct->func(block);
359 }
360 it1 += NB_ITS_SPEED;
361 ti1 = gettime() - ti;
362 } while (ti1 < 1000000);
363 mmx_emms();
364
365 printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name,
366 (double) it1 * 1000.0 / (double) ti1);
367
368 return 0;
369 }
370
371 DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
372 DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];
373
374 static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
375 {
376 static int init;
377 static double c8[8][8];
378 static double c4[4][4];
379 double block1[64], block2[64], block3[64];
380 double s, sum, v;
381 int i, j, k;
382
383 if (!init) {
384 init = 1;
385
386 for (i = 0; i < 8; i++) {
387 sum = 0;
388 for (j = 0; j < 8; j++) {
389 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
390 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
391 sum += c8[i][j] * c8[i][j];
392 }
393 }
394
395 for (i = 0; i < 4; i++) {
396 sum = 0;
397 for (j = 0; j < 4; j++) {
398 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
399 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
400 sum += c4[i][j] * c4[i][j];
401 }
402 }
403 }
404
405 /* butterfly */
406 s = 0.5 * sqrt(2.0);
407 for (i = 0; i < 4; i++) {
408 for (j = 0; j < 8; j++) {
409 block1[8 * (2 * i) + j] =
410 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
411 block1[8 * (2 * i + 1) + j] =
412 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
413 }
414 }
415
416 /* idct8 on lines */
417 for (i = 0; i < 8; i++) {
418 for (j = 0; j < 8; j++) {
419 sum = 0;
420 for (k = 0; k < 8; k++)
421 sum += c8[k][j] * block1[8 * i + k];
422 block2[8 * i + j] = sum;
423 }
424 }
425
426 /* idct4 */
427 for (i = 0; i < 8; i++) {
428 for (j = 0; j < 4; j++) {
429 /* top */
430 sum = 0;
431 for (k = 0; k < 4; k++)
432 sum += c4[k][j] * block2[8 * (2 * k) + i];
433 block3[8 * (2 * j) + i] = sum;
434
435 /* bottom */
436 sum = 0;
437 for (k = 0; k < 4; k++)
438 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
439 block3[8 * (2 * j + 1) + i] = sum;
440 }
441 }
442
443 /* clamp and store the result */
444 for (i = 0; i < 8; i++) {
445 for (j = 0; j < 8; j++) {
446 v = block3[8 * i + j];
447 if (v < 0) v = 0;
448 else if (v > 255) v = 255;
449 dest[i * linesize + j] = (int) rint(v);
450 }
451 }
452 }
453
454 static void idct248_error(const char *name,
455 void (*idct248_put)(uint8_t *dest, int line_size,
456 int16_t *block),
457 int speed)
458 {
459 int it, i, it1, ti, ti1, err_max, v;
460 AVLFG prng;
461
462 av_lfg_init(&prng, 1);
463
464 /* just one test to see if code is correct (precision is less
465 important here) */
466 err_max = 0;
467 for (it = 0; it < NB_ITS; it++) {
468 /* XXX: use forward transform to generate values */
469 for (i = 0; i < 64; i++)
470 block1[i] = av_lfg_get(&prng) % 256 - 128;
471 block1[0] += 1024;
472
473 for (i = 0; i < 64; i++)
474 block[i] = block1[i];
475 idct248_ref(img_dest1, 8, block);
476
477 for (i = 0; i < 64; i++)
478 block[i] = block1[i];
479 idct248_put(img_dest, 8, block);
480
481 for (i = 0; i < 64; i++) {
482 v = abs((int) img_dest[i] - (int) img_dest1[i]);
483 if (v == 255)
484 printf("%d %d\n", img_dest[i], img_dest1[i]);
485 if (v > err_max)
486 err_max = v;
487 }
488 }
489 printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
490
491 if (!speed)
492 return;
493
494 ti = gettime();
495 it1 = 0;
496 do {
497 for (it = 0; it < NB_ITS_SPEED; it++) {
498 for (i = 0; i < 64; i++)
499 block[i] = block1[i];
500 idct248_put(img_dest, 8, block);
501 }
502 it1 += NB_ITS_SPEED;
503 ti1 = gettime() - ti;
504 } while (ti1 < 1000000);
505 mmx_emms();
506
507 printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
508 (double) it1 * 1000.0 / (double) ti1);
509 }
510
511 static void help(void)
512 {
513 printf("dct-test [-i] [<test-number>]\n"
514 "test-number 0 -> test with random matrixes\n"
515 " 1 -> test with random sparse matrixes\n"
516 " 2 -> do 3. test from mpeg4 std\n"
517 "-i test IDCT implementations\n"
518 "-4 test IDCT248 implementations\n"
519 "-t speed test\n");
520 }
521
522 int main(int argc, char **argv)
523 {
524 int test_idct = 0, test_248_dct = 0;
525 int c, i;
526 int test = 1;
527 int speed = 0;
528 int err = 0;
529
530 cpu_flags = av_get_cpu_flags();
531
532 ff_ref_dct_init();
533 idct_mmx_init();
534
535 for (i = 0; i < 256; i++)
536 cropTbl[i + MAX_NEG_CROP] = i;
537 for (i = 0; i < MAX_NEG_CROP; i++) {
538 cropTbl[i] = 0;
539 cropTbl[i + MAX_NEG_CROP + 256] = 255;
540 }
541
542 for (;;) {
543 c = getopt(argc, argv, "ih4t");
544 if (c == -1)
545 break;
546 switch (c) {
547 case 'i':
548 test_idct = 1;
549 break;
550 case '4':
551 test_248_dct = 1;
552 break;
553 case 't':
554 speed = 1;
555 break;
556 default:
557 case 'h':
558 help();
559 return 0;
560 }
561 }
562
563 if (optind < argc)
564 test = atoi(argv[optind]);
565
566 printf("ffmpeg DCT/IDCT test\n");
567
568 if (test_248_dct) {
569 idct248_error("SIMPLE-C", ff_simple_idct248_put, speed);
570 } else {
571 const struct algo *algos = test_idct ? idct_tab : fdct_tab;
572 for (i = 0; algos[i].name; i++)
573 if (!(~cpu_flags & algos[i].mm_support)) {
574 err |= dct_error(&algos[i], test, test_idct, speed);
575 }
576 }
577
578 return err;
579 }