d1393c45847f588c7c2f039b8638d4d550dd4586
[libav.git] / libavcodec / dct-test.c
1 /*
2 * (c) 2001 Fabrice Bellard
3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
4 *
5 * This file is part of Libav.
6 *
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file
24 * DCT test (c) 2001 Fabrice Bellard
25 * Started from sample code by Juan J. Sierralta P.
26 */
27
28 #include "config.h"
29 #include <stdlib.h>
30 #include <stdio.h>
31 #include <string.h>
32 #if HAVE_UNISTD_H
33 #include <unistd.h>
34 #endif
35 #include <math.h>
36
37 #include "libavutil/cpu.h"
38 #include "libavutil/common.h"
39 #include "libavutil/lfg.h"
40 #include "libavutil/time.h"
41
42 #include "dct.h"
43 #include "idctdsp.h"
44 #include "simple_idct.h"
45 #include "aandcttab.h"
46 #include "faandct.h"
47 #include "faanidct.h"
48 #include "arm/idct.h"
49 #include "ppc/fdct.h"
50 #include "x86/fdct.h"
51 #include "x86/idct_xvid.h"
52 #include "x86/simple_idct.h"
53 #include "dctref.h"
54
55 struct algo {
56 const char *name;
57 void (*func)(int16_t *block);
58 enum idct_permutation_type perm_type;
59 int cpu_flag;
60 int nonspec;
61 };
62
63 static const struct algo fdct_tab[] = {
64 { "REF-DBL", ff_ref_fdct, FF_IDCT_PERM_NONE },
65 { "FAAN", ff_faandct, FF_IDCT_PERM_NONE },
66 { "IJG-AAN-INT", ff_fdct_ifast, FF_IDCT_PERM_NONE },
67 { "IJG-LLM-INT", ff_jpeg_fdct_islow_8, FF_IDCT_PERM_NONE },
68
69 #if HAVE_MMX_INLINE
70 { "MMX", ff_fdct_mmx, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMX },
71 #endif
72 #if HAVE_MMXEXT_INLINE
73 { "MMXEXT", ff_fdct_mmxext, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMXEXT },
74 #endif
75 #if HAVE_SSE2_INLINE
76 { "SSE2", ff_fdct_sse2, FF_IDCT_PERM_NONE, AV_CPU_FLAG_SSE2 },
77 #endif
78
79 #if HAVE_ALTIVEC
80 { "altivecfdct", ff_fdct_altivec, FF_IDCT_PERM_NONE, AV_CPU_FLAG_ALTIVEC },
81 #endif
82
83 { 0 }
84 };
85
86 static const struct algo idct_tab[] = {
87 { "FAANI", ff_faanidct, FF_IDCT_PERM_NONE },
88 { "REF-DBL", ff_ref_idct, FF_IDCT_PERM_NONE },
89 { "INT", ff_j_rev_dct, FF_IDCT_PERM_LIBMPEG2 },
90 { "SIMPLE-C", ff_simple_idct_8, FF_IDCT_PERM_NONE },
91
92 #if HAVE_MMX_INLINE
93 { "SIMPLE-MMX", ff_simple_idct_mmx, FF_IDCT_PERM_SIMPLE, AV_CPU_FLAG_MMX },
94 { "XVID-MMX", ff_idct_xvid_mmx, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMX, 1 },
95 #endif
96 #if HAVE_MMXEXT_INLINE
97 { "XVID-MMXEXT", ff_idct_xvid_mmxext, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMXEXT, 1 },
98 #endif
99 #if HAVE_SSE2_INLINE
100 { "XVID-SSE2", ff_idct_xvid_sse2, FF_IDCT_PERM_SSE2, AV_CPU_FLAG_SSE2, 1 },
101 #endif
102
103 #if ARCH_ARM
104 { "SIMPLE-ARM", ff_simple_idct_arm, FF_IDCT_PERM_NONE },
105 { "INT-ARM", ff_j_rev_dct_arm, FF_IDCT_PERM_LIBMPEG2 },
106 #endif
107 #if HAVE_ARMV5TE
108 { "SIMPLE-ARMV5TE", ff_simple_idct_armv5te, FF_IDCT_PERM_NONE, AV_CPU_FLAG_ARMV5TE },
109 #endif
110 #if HAVE_ARMV6
111 { "SIMPLE-ARMV6", ff_simple_idct_armv6, FF_IDCT_PERM_LIBMPEG2, AV_CPU_FLAG_ARMV6 },
112 #endif
113 #if HAVE_NEON && ARCH_ARM
114 { "SIMPLE-NEON", ff_simple_idct_neon, FF_IDCT_PERM_PARTTRANS, AV_CPU_FLAG_NEON },
115 #endif
116
117 { 0 }
118 };
119
120 #define AANSCALE_BITS 12
121
122 #define NB_ITS 20000
123 #define NB_ITS_SPEED 50000
124
125 static short idct_simple_mmx_perm[64] = {
126 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
127 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
128 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
129 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
130 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
131 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
132 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
133 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
134 };
135
136 static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
137
138 DECLARE_ALIGNED(16, static int16_t, block)[64];
139 DECLARE_ALIGNED(8, static int16_t, block1)[64];
140
141 static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng)
142 {
143 int i, j;
144
145 memset(block, 0, 64 * sizeof(*block));
146
147 switch (test) {
148 case 0:
149 for (i = 0; i < 64; i++)
150 block[i] = (av_lfg_get(prng) % 512) - 256;
151 if (is_idct) {
152 ff_ref_fdct(block);
153 for (i = 0; i < 64; i++)
154 block[i] >>= 3;
155 }
156 break;
157 case 1:
158 j = av_lfg_get(prng) % 10 + 1;
159 for (i = 0; i < j; i++)
160 block[av_lfg_get(prng) % 64] = av_lfg_get(prng) % 512 - 256;
161 break;
162 case 2:
163 block[ 0] = av_lfg_get(prng) % 4096 - 2048;
164 block[63] = (block[0] & 1) ^ 1;
165 break;
166 }
167 }
168
169 static void permute(int16_t dst[64], const int16_t src[64],
170 enum idct_permutation_type perm_type)
171 {
172 int i;
173
174 switch (perm_type) {
175 case FF_IDCT_PERM_LIBMPEG2:
176 for (i = 0; i < 64; i++)
177 dst[(i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2)] = src[i];
178 break;
179 case FF_IDCT_PERM_SIMPLE:
180 for (i = 0; i < 64; i++)
181 dst[idct_simple_mmx_perm[i]] = src[i];
182 break;
183 case FF_IDCT_PERM_SSE2:
184 for (i = 0; i < 64; i++)
185 dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];
186 break;
187 case FF_IDCT_PERM_PARTTRANS:
188 for (i = 0; i < 64; i++)
189 dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
190 break;
191 default:
192 for (i = 0; i < 64; i++)
193 dst[i] = src[i];
194 break;
195 }
196 }
197
198 static int dct_error(const struct algo *dct, int test, int is_idct, int speed)
199 {
200 void (*ref)(int16_t *block) = is_idct ? ff_ref_idct : ff_ref_fdct;
201 int it, i, scale;
202 int err_inf, v;
203 int64_t err2, ti, ti1, it1, err_sum = 0;
204 int64_t sysErr[64], sysErrMax = 0;
205 int maxout = 0;
206 int blockSumErrMax = 0, blockSumErr;
207 AVLFG prng;
208 double omse, ome;
209 int spec_err;
210
211 av_lfg_init(&prng, 1);
212
213 err_inf = 0;
214 err2 = 0;
215 for (i = 0; i < 64; i++)
216 sysErr[i] = 0;
217 for (it = 0; it < NB_ITS; it++) {
218 init_block(block1, test, is_idct, &prng);
219 permute(block, block1, dct->perm_type);
220
221 dct->func(block);
222 emms_c();
223
224 if (!strcmp(dct->name, "IJG-AAN-INT")) {
225 for (i = 0; i < 64; i++) {
226 scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
227 block[i] = (block[i] * scale) >> AANSCALE_BITS;
228 }
229 }
230
231 ref(block1);
232
233 blockSumErr = 0;
234 for (i = 0; i < 64; i++) {
235 int err = block[i] - block1[i];
236 err_sum += err;
237 v = abs(err);
238 if (v > err_inf)
239 err_inf = v;
240 err2 += v * v;
241 sysErr[i] += block[i] - block1[i];
242 blockSumErr += v;
243 if (abs(block[i]) > maxout)
244 maxout = abs(block[i]);
245 }
246 if (blockSumErrMax < blockSumErr)
247 blockSumErrMax = blockSumErr;
248 }
249 for (i = 0; i < 64; i++)
250 sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i]));
251
252 for (i = 0; i < 64; i++) {
253 if (i % 8 == 0)
254 printf("\n");
255 printf("%7d ", (int) sysErr[i]);
256 }
257 printf("\n");
258
259 omse = (double) err2 / NB_ITS / 64;
260 ome = (double) err_sum / NB_ITS / 64;
261
262 spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
263
264 printf("%s %s: ppe=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
265 is_idct ? "IDCT" : "DCT", dct->name, err_inf,
266 omse, ome, (double) sysErrMax / NB_ITS,
267 maxout, blockSumErrMax);
268
269 if (spec_err && !dct->nonspec)
270 return 1;
271
272 if (!speed)
273 return 0;
274
275 /* speed test */
276 init_block(block, test, is_idct, &prng);
277 permute(block1, block, dct->perm_type);
278
279 ti = av_gettime();
280 it1 = 0;
281 do {
282 for (it = 0; it < NB_ITS_SPEED; it++) {
283 memcpy(block, block1, sizeof(block));
284 dct->func(block);
285 }
286 it1 += NB_ITS_SPEED;
287 ti1 = av_gettime() - ti;
288 } while (ti1 < 1000000);
289 emms_c();
290
291 printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name,
292 (double) it1 * 1000.0 / (double) ti1);
293
294 return 0;
295 }
296
297 DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
298 DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];
299
300 static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
301 {
302 static int init;
303 static double c8[8][8];
304 static double c4[4][4];
305 double block1[64], block2[64], block3[64];
306 double s, sum, v;
307 int i, j, k;
308
309 if (!init) {
310 init = 1;
311
312 for (i = 0; i < 8; i++) {
313 sum = 0;
314 for (j = 0; j < 8; j++) {
315 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
316 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
317 sum += c8[i][j] * c8[i][j];
318 }
319 }
320
321 for (i = 0; i < 4; i++) {
322 sum = 0;
323 for (j = 0; j < 4; j++) {
324 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
325 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
326 sum += c4[i][j] * c4[i][j];
327 }
328 }
329 }
330
331 /* butterfly */
332 s = 0.5 * sqrt(2.0);
333 for (i = 0; i < 4; i++) {
334 for (j = 0; j < 8; j++) {
335 block1[8 * (2 * i) + j] =
336 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
337 block1[8 * (2 * i + 1) + j] =
338 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
339 }
340 }
341
342 /* idct8 on lines */
343 for (i = 0; i < 8; i++) {
344 for (j = 0; j < 8; j++) {
345 sum = 0;
346 for (k = 0; k < 8; k++)
347 sum += c8[k][j] * block1[8 * i + k];
348 block2[8 * i + j] = sum;
349 }
350 }
351
352 /* idct4 */
353 for (i = 0; i < 8; i++) {
354 for (j = 0; j < 4; j++) {
355 /* top */
356 sum = 0;
357 for (k = 0; k < 4; k++)
358 sum += c4[k][j] * block2[8 * (2 * k) + i];
359 block3[8 * (2 * j) + i] = sum;
360
361 /* bottom */
362 sum = 0;
363 for (k = 0; k < 4; k++)
364 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
365 block3[8 * (2 * j + 1) + i] = sum;
366 }
367 }
368
369 /* clamp and store the result */
370 for (i = 0; i < 8; i++) {
371 for (j = 0; j < 8; j++) {
372 v = block3[8 * i + j];
373 if (v < 0) v = 0;
374 else if (v > 255) v = 255;
375 dest[i * linesize + j] = (int) rint(v);
376 }
377 }
378 }
379
380 static void idct248_error(const char *name,
381 void (*idct248_put)(uint8_t *dest, int line_size,
382 int16_t *block),
383 int speed)
384 {
385 int it, i, it1, ti, ti1, err_max, v;
386 AVLFG prng;
387
388 av_lfg_init(&prng, 1);
389
390 /* just one test to see if code is correct (precision is less
391 important here) */
392 err_max = 0;
393 for (it = 0; it < NB_ITS; it++) {
394 /* XXX: use forward transform to generate values */
395 for (i = 0; i < 64; i++)
396 block1[i] = av_lfg_get(&prng) % 256 - 128;
397 block1[0] += 1024;
398
399 for (i = 0; i < 64; i++)
400 block[i] = block1[i];
401 idct248_ref(img_dest1, 8, block);
402
403 for (i = 0; i < 64; i++)
404 block[i] = block1[i];
405 idct248_put(img_dest, 8, block);
406
407 for (i = 0; i < 64; i++) {
408 v = abs((int) img_dest[i] - (int) img_dest1[i]);
409 if (v == 255)
410 printf("%d %d\n", img_dest[i], img_dest1[i]);
411 if (v > err_max)
412 err_max = v;
413 }
414 }
415 printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
416
417 if (!speed)
418 return;
419
420 ti = av_gettime();
421 it1 = 0;
422 do {
423 for (it = 0; it < NB_ITS_SPEED; it++) {
424 for (i = 0; i < 64; i++)
425 block[i] = block1[i];
426 idct248_put(img_dest, 8, block);
427 }
428 it1 += NB_ITS_SPEED;
429 ti1 = av_gettime() - ti;
430 } while (ti1 < 1000000);
431 emms_c();
432
433 printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
434 (double) it1 * 1000.0 / (double) ti1);
435 }
436
437 static void help(void)
438 {
439 printf("dct-test [-i] [<test-number>]\n"
440 "test-number 0 -> test with random matrixes\n"
441 " 1 -> test with random sparse matrixes\n"
442 " 2 -> do 3. test from mpeg4 std\n"
443 "-i test IDCT implementations\n"
444 "-4 test IDCT248 implementations\n"
445 "-t speed test\n");
446 }
447
448 #if !HAVE_GETOPT
449 #include "compat/getopt.c"
450 #endif
451
452 int main(int argc, char **argv)
453 {
454 int test_idct = 0, test_248_dct = 0;
455 int c, i;
456 int test = 1;
457 int speed = 0;
458 int err = 0;
459
460 ff_ref_dct_init();
461
462 for (;;) {
463 c = getopt(argc, argv, "ih4t");
464 if (c == -1)
465 break;
466 switch (c) {
467 case 'i':
468 test_idct = 1;
469 break;
470 case '4':
471 test_248_dct = 1;
472 break;
473 case 't':
474 speed = 1;
475 break;
476 default:
477 case 'h':
478 help();
479 return 0;
480 }
481 }
482
483 if (optind < argc)
484 test = atoi(argv[optind]);
485
486 printf("Libav DCT/IDCT test\n");
487
488 if (test_248_dct) {
489 idct248_error("SIMPLE-C", ff_simple_idct248_put, speed);
490 } else {
491 const int cpu_flags = av_get_cpu_flags();
492 const struct algo *algos = test_idct ? idct_tab : fdct_tab;
493 for (i = 0; algos[i].name; i++)
494 if (!(~cpu_flags & algos[i].cpu_flag)) {
495 err |= dct_error(&algos[i], test, test_idct, speed);
496 }
497 }
498
499 if (err)
500 printf("Error: %d.\n", err);
501
502 return !!err;
503 }