dct-test: split table in two for idct and fdct
[libav.git] / libavcodec / dct-test.c
CommitLineData
04d7f601
DB
1/*
2 * (c) 2001 Fabrice Bellard
3ac35bdb 3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
04d7f601 4 *
2912e87a 5 * This file is part of Libav.
b78e7197 6 *
2912e87a 7 * Libav is free software; you can redistribute it and/or
04d7f601
DB
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
04d7f601 11 *
2912e87a 12 * Libav is distributed in the hope that it will be useful,
04d7f601
DB
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
2912e87a 18 * License along with Libav; if not, write to the Free Software
04d7f601
DB
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
983e3246 22/**
ba87f080 23 * @file
94f694a4 24 * DCT test (c) 2001 Fabrice Bellard
983e3246
MN
25 * Started from sample code by Juan J. Sierralta P.
26 */
27
de6d9b64
FB
28#include <stdlib.h>
29#include <stdio.h>
30#include <string.h>
31#include <sys/time.h>
32#include <unistd.h>
12807c8d 33#include <math.h>
de6d9b64 34
c6c98d08 35#include "libavutil/cpu.h"
ae32e509 36#include "libavutil/common.h"
294eaa26 37#include "libavutil/lfg.h"
de6d9b64 38
86748dbc 39#include "simple_idct.h"
10ac3618 40#include "aandcttab.h"
65e4c8c9 41#include "faandct.h"
6f08c541 42#include "faanidct.h"
a6493a8f 43#include "x86/idct_xvid.h"
6a813295 44#include "dctref.h"
9e1586fc 45
434df899
MN
46#undef printf
47
9686df2b
DB
48void ff_mmx_idct(DCTELEM *data);
49void ff_mmxext_idct(DCTELEM *data);
9e1586fc 50
9686df2b 51void odivx_idct_c(short *block);
86748dbc 52
3ac35bdb 53// BFIN
9686df2b
DB
54void ff_bfin_idct(DCTELEM *block);
55void ff_bfin_fdct(DCTELEM *block);
3ac35bdb
MH
56
57// ALTIVEC
9686df2b
DB
58void fdct_altivec(DCTELEM *block);
59//void idct_altivec(DCTELEM *block);?? no routine
3ac35bdb 60
479044ce 61// ARM
0926c009
MR
62void ff_j_rev_dct_arm(DCTELEM *data);
63void ff_simple_idct_arm(DCTELEM *data);
64void ff_simple_idct_armv5te(DCTELEM *data);
479044ce
MR
65void ff_simple_idct_armv6(DCTELEM *data);
66void ff_simple_idct_neon(DCTELEM *data);
3ac35bdb 67
2a839eeb
MR
68void ff_simple_idct_axp(DCTELEM *data);
69
3ac35bdb 70struct algo {
36fa9ef3
MR
71 const char *name;
72 enum { FDCT, IDCT } is_idct;
73 void (*func)(DCTELEM *block);
74 void (*ref) (DCTELEM *block);
75 enum formattag { NO_PERM, MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM,
76 SSE2_PERM, PARTTRANS_PERM } format;
77 int mm_support;
3ac35bdb
MH
78};
79
80#ifndef FAAN_POSTSCALE
81#define FAAN_SCALE SCALE_PERM
82#else
83#define FAAN_SCALE NO_PERM
84#endif
85
aadd27cd
MN
86static int cpu_flags;
87
4b357756 88static const struct algo fdct_tab[] = {
36fa9ef3
MR
89 {"REF-DBL", 0, ff_ref_fdct, ff_ref_fdct, NO_PERM},
90 {"FAAN", 0, ff_faandct, ff_ref_fdct, FAAN_SCALE},
36fa9ef3
MR
91 {"IJG-AAN-INT", 0, fdct_ifast, ff_ref_fdct, SCALE_PERM},
92 {"IJG-LLM-INT", 0, ff_jpeg_fdct_islow, ff_ref_fdct, NO_PERM},
3ac35bdb 93
b250f9c6 94#if HAVE_MMX
36fa9ef3 95 {"MMX", 0, ff_fdct_mmx, ff_ref_fdct, NO_PERM, AV_CPU_FLAG_MMX},
36fa9ef3
MR
96 {"MMX2", 0, ff_fdct_mmx2, ff_ref_fdct, NO_PERM, AV_CPU_FLAG_MMX2},
97 {"SSE2", 0, ff_fdct_sse2, ff_ref_fdct, NO_PERM, AV_CPU_FLAG_SSE2},
94254fc0 98#endif
3ac35bdb 99
4b357756
MR
100#if HAVE_ALTIVEC
101 {"altivecfdct", 0, fdct_altivec, ff_ref_fdct, NO_PERM, AV_CPU_FLAG_ALTIVEC},
102#endif
103
104#if ARCH_BFIN
105 {"BFINfdct", 0, ff_bfin_fdct, ff_ref_fdct, NO_PERM},
106#endif
107
108 { 0 }
109};
110
111static const struct algo idct_tab[] = {
112 {"FAANI", 1, ff_faanidct, ff_ref_idct, NO_PERM},
113 {"REF-DBL", 1, ff_ref_idct, ff_ref_idct, NO_PERM},
114 {"INT", 1, j_rev_dct, ff_ref_idct, MMX_PERM},
115 {"SIMPLE-C", 1, ff_simple_idct, ff_ref_idct, NO_PERM},
116
117#if HAVE_MMX
b250f9c6 118#if CONFIG_GPL
36fa9ef3
MR
119 {"LIBMPEG2-MMX", 1, ff_mmx_idct, ff_ref_idct, MMX_PERM, AV_CPU_FLAG_MMX},
120 {"LIBMPEG2-MMX2", 1, ff_mmxext_idct, ff_ref_idct, MMX_PERM, AV_CPU_FLAG_MMX2},
b9702de5 121#endif
36fa9ef3
MR
122 {"SIMPLE-MMX", 1, ff_simple_idct_mmx, ff_ref_idct, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX},
123 {"XVID-MMX", 1, ff_idct_xvid_mmx, ff_ref_idct, NO_PERM, AV_CPU_FLAG_MMX},
124 {"XVID-MMX2", 1, ff_idct_xvid_mmx2, ff_ref_idct, NO_PERM, AV_CPU_FLAG_MMX2},
125 {"XVID-SSE2", 1, ff_idct_xvid_sse2, ff_ref_idct, SSE2_PERM, AV_CPU_FLAG_SSE2},
3ac35bdb
MH
126#endif
127
b250f9c6 128#if ARCH_BFIN
36fa9ef3 129 {"BFINidct", 1, ff_bfin_idct, ff_ref_idct, NO_PERM},
3ac35bdb
MH
130#endif
131
b250f9c6 132#if ARCH_ARM
36fa9ef3
MR
133 {"SIMPLE-ARM", 1, ff_simple_idct_arm, ff_ref_idct, NO_PERM },
134 {"INT-ARM", 1, ff_j_rev_dct_arm, ff_ref_idct, MMX_PERM },
4b357756 135#endif
b250f9c6 136#if HAVE_ARMV5TE
36fa9ef3 137 {"SIMPLE-ARMV5TE", 1, ff_simple_idct_armv5te, ff_ref_idct, NO_PERM },
479044ce 138#endif
b250f9c6 139#if HAVE_ARMV6
36fa9ef3 140 {"SIMPLE-ARMV6", 1, ff_simple_idct_armv6, ff_ref_idct, MMX_PERM },
479044ce 141#endif
b250f9c6 142#if HAVE_NEON
36fa9ef3 143 {"SIMPLE-NEON", 1, ff_simple_idct_neon, ff_ref_idct, PARTTRANS_PERM },
479044ce 144#endif
479044ce 145
2a839eeb 146#if ARCH_ALPHA
36fa9ef3 147 {"SIMPLE-ALPHA", 1, ff_simple_idct_axp, ff_ref_idct, NO_PERM },
2a839eeb
MR
148#endif
149
36fa9ef3 150 { 0 }
3ac35bdb
MH
151};
152
de6d9b64 153#define AANSCALE_BITS 12
de6d9b64 154
486497e0 155uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
86748dbc 156
504ffed1 157static int64_t gettime(void)
de6d9b64
FB
158{
159 struct timeval tv;
36fa9ef3 160 gettimeofday(&tv, NULL);
0c1a9eda 161 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
de6d9b64
FB
162}
163
164#define NB_ITS 20000
165#define NB_ITS_SPEED 50000
166
9e1586fc
FB
167static short idct_mmx_perm[64];
168
36fa9ef3
MR
169static short idct_simple_mmx_perm[64] = {
170 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
171 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
172 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
173 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
174 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
175 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
176 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
177 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
86748dbc
MN
178};
179
36fa9ef3 180static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
ad246860 181
504ffed1 182static void idct_mmx_init(void)
9e1586fc
FB
183{
184 int i;
185
186 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
187 for (i = 0; i < 64; i++) {
bb270c08 188 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
9e1586fc
FB
189 }
190}
191
c6727809 192DECLARE_ALIGNED(16, static DCTELEM, block)[64];
36fa9ef3
MR
193DECLARE_ALIGNED(8, static DCTELEM, block1)[64];
194DECLARE_ALIGNED(8, static DCTELEM, block_org)[64];
9e1586fc 195
aadd27cd
MN
196static inline void mmx_emms(void)
197{
b250f9c6 198#if HAVE_MMX
7160bb71 199 if (cpu_flags & AV_CPU_FLAG_MMX)
be449fca 200 __asm__ volatile ("emms\n\t");
aadd27cd
MN
201#endif
202}
203
4f905a65 204static void dct_error(const struct algo *dct, int test)
de6d9b64
FB
205{
206 int it, i, scale;
de6d9b64 207 int err_inf, v;
0c1a9eda 208 int64_t err2, ti, ti1, it1;
36fa9ef3
MR
209 int64_t sysErr[64], sysErrMax = 0;
210 int maxout = 0;
211 int blockSumErrMax = 0, blockSumErr;
64bde197 212 AVLFG prng;
de6d9b64 213
64bde197 214 av_lfg_init(&prng, 1);
de6d9b64
FB
215
216 err_inf = 0;
217 err2 = 0;
36fa9ef3
MR
218 for (i = 0; i < 64; i++)
219 sysErr[i] = 0;
220 for (it = 0; it < NB_ITS; it++) {
221 for (i = 0; i < 64; i++)
86748dbc 222 block1[i] = 0;
36fa9ef3 223 switch (test) {
115329f1 224 case 0:
36fa9ef3
MR
225 for (i = 0; i < 64; i++)
226 block1[i] = (av_lfg_get(&prng) % 512) - 256;
4f905a65 227 if (dct->is_idct) {
0de74546 228 ff_ref_fdct(block1);
36fa9ef3
MR
229 for (i = 0; i < 64; i++)
230 block1[i] >>= 3;
ad324c93 231 }
36fa9ef3
MR
232 break;
233 case 1: {
234 int num = av_lfg_get(&prng) % 10 + 1;
235 for (i = 0; i < num; i++)
236 block1[av_lfg_get(&prng) % 64] =
237 av_lfg_get(&prng) % 512 - 256;
238 }
239 break;
86748dbc 240 case 2:
64bde197 241 block1[0] = av_lfg_get(&prng) % 4096 - 2048;
36fa9ef3
MR
242 block1[63] = (block1[0] & 1) ^ 1;
243 break;
86748dbc 244 }
9e1586fc 245
36fa9ef3
MR
246 for (i = 0; i < 64; i++)
247 block_org[i] = block1[i];
9e1586fc 248
4f905a65 249 if (dct->format == MMX_PERM) {
36fa9ef3 250 for (i = 0; i < 64; i++)
9e1586fc 251 block[idct_mmx_perm[i]] = block1[i];
4f905a65 252 } else if (dct->format == MMX_SIMPLE_PERM) {
36fa9ef3 253 for (i = 0; i < 64; i++)
86748dbc 254 block[idct_simple_mmx_perm[i]] = block1[i];
4f905a65 255 } else if (dct->format == SSE2_PERM) {
36fa9ef3
MR
256 for (i = 0; i < 64; i++)
257 block[(i & 0x38) | idct_sse2_row_perm[i & 7]] = block1[i];
4f905a65 258 } else if (dct->format == PARTTRANS_PERM) {
36fa9ef3
MR
259 for (i = 0; i < 64; i++)
260 block[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = block1[i];
bb270c08 261 } else {
36fa9ef3
MR
262 for (i = 0; i < 64; i++)
263 block[i] = block1[i];
9e1586fc
FB
264 }
265
4f905a65 266 dct->func(block);
aadd27cd 267 mmx_emms();
9e1586fc 268
4f905a65 269 if (dct->format == SCALE_PERM) {
36fa9ef3
MR
270 for (i = 0; i < 64; i++) {
271 scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
272 block[i] = (block[i] * scale) >> AANSCALE_BITS;
86748dbc
MN
273 }
274 }
275
4f905a65 276 dct->ref(block1);
de6d9b64 277
36fa9ef3
MR
278 blockSumErr = 0;
279 for (i = 0; i < 64; i++) {
de6d9b64
FB
280 v = abs(block[i] - block1[i]);
281 if (v > err_inf)
282 err_inf = v;
283 err2 += v * v;
bb270c08
DB
284 sysErr[i] += block[i] - block1[i];
285 blockSumErr += v;
36fa9ef3
MR
286 if (abs(block[i]) > maxout)
287 maxout = abs(block[i]);
de6d9b64 288 }
36fa9ef3
MR
289 if (blockSumErrMax < blockSumErr)
290 blockSumErrMax = blockSumErr;
86748dbc 291 }
36fa9ef3
MR
292 for (i = 0; i < 64; i++)
293 sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i]));
115329f1 294
36fa9ef3
MR
295 for (i = 0; i < 64; i++) {
296 if (i % 8 == 0)
297 printf("\n");
298 printf("%7d ", (int) sysErr[i]);
de6d9b64 299 }
86748dbc 300 printf("\n");
115329f1 301
86748dbc 302 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
4f905a65 303 dct->is_idct ? "IDCT" : "DCT", dct->name, err_inf,
36fa9ef3
MR
304 (double) err2 / NB_ITS / 64.0, (double) sysErrMax / NB_ITS,
305 maxout, blockSumErrMax);
e6ff0648 306
de6d9b64 307 /* speed test */
36fa9ef3 308 for (i = 0; i < 64; i++)
86748dbc 309 block1[i] = 0;
36fa9ef3
MR
310
311 switch (test) {
115329f1 312 case 0:
36fa9ef3
MR
313 for (i = 0; i < 64; i++)
314 block1[i] = av_lfg_get(&prng) % 512 - 256;
4f905a65 315 if (dct->is_idct) {
0de74546 316 ff_ref_fdct(block1);
36fa9ef3
MR
317 for (i = 0; i < 64; i++)
318 block1[i] >>= 3;
ad324c93 319 }
36fa9ef3
MR
320 break;
321 case 1:
86748dbc 322 case 2:
36fa9ef3
MR
323 block1[0] = av_lfg_get(&prng) % 512 - 256;
324 block1[1] = av_lfg_get(&prng) % 512 - 256;
325 block1[2] = av_lfg_get(&prng) % 512 - 256;
326 block1[3] = av_lfg_get(&prng) % 512 - 256;
327 break;
86748dbc 328 }
de6d9b64 329
4f905a65 330 if (dct->format == MMX_PERM) {
36fa9ef3 331 for (i = 0; i < 64; i++)
9e1586fc 332 block[idct_mmx_perm[i]] = block1[i];
4f905a65 333 } else if (dct->format == MMX_SIMPLE_PERM) {
36fa9ef3 334 for (i = 0; i < 64; i++)
86748dbc
MN
335 block[idct_simple_mmx_perm[i]] = block1[i];
336 } else {
36fa9ef3
MR
337 for (i = 0; i < 64; i++)
338 block[i] = block1[i];
9e1586fc
FB
339 }
340
de6d9b64
FB
341 ti = gettime();
342 it1 = 0;
343 do {
36fa9ef3
MR
344 for (it = 0; it < NB_ITS_SPEED; it++) {
345 for (i = 0; i < 64; i++)
346 block[i] = block1[i];
4f905a65 347 dct->func(block);
de6d9b64
FB
348 }
349 it1 += NB_ITS_SPEED;
350 ti1 = gettime() - ti;
351 } while (ti1 < 1000000);
aadd27cd 352 mmx_emms();
de6d9b64 353
4f905a65 354 printf("%s %s: %0.1f kdct/s\n", dct->is_idct ? "IDCT" : "DCT", dct->name,
36fa9ef3 355 (double) it1 * 1000.0 / (double) ti1);
de6d9b64
FB
356}
357
c6727809
MR
358DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
359DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];
a46a3ce4 360
504ffed1 361static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
a46a3ce4
FB
362{
363 static int init;
364 static double c8[8][8];
365 static double c4[4][4];
366 double block1[64], block2[64], block3[64];
367 double s, sum, v;
368 int i, j, k;
369
370 if (!init) {
371 init = 1;
372
36fa9ef3 373 for (i = 0; i < 8; i++) {
a46a3ce4 374 sum = 0;
36fa9ef3
MR
375 for (j = 0; j < 8; j++) {
376 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
a46a3ce4
FB
377 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
378 sum += c8[i][j] * c8[i][j];
379 }
380 }
115329f1 381
36fa9ef3 382 for (i = 0; i < 4; i++) {
a46a3ce4 383 sum = 0;
36fa9ef3
MR
384 for (j = 0; j < 4; j++) {
385 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
a46a3ce4
FB
386 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
387 sum += c4[i][j] * c4[i][j];
388 }
389 }
390 }
391
392 /* butterfly */
652f0197 393 s = 0.5 * sqrt(2.0);
36fa9ef3
MR
394 for (i = 0; i < 4; i++) {
395 for (j = 0; j < 8; j++) {
396 block1[8 * (2 * i) + j] =
397 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
398 block1[8 * (2 * i + 1) + j] =
399 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
a46a3ce4
FB
400 }
401 }
402
403 /* idct8 on lines */
36fa9ef3
MR
404 for (i = 0; i < 8; i++) {
405 for (j = 0; j < 8; j++) {
a46a3ce4 406 sum = 0;
36fa9ef3
MR
407 for (k = 0; k < 8; k++)
408 sum += c8[k][j] * block1[8 * i + k];
409 block2[8 * i + j] = sum;
a46a3ce4
FB
410 }
411 }
412
413 /* idct4 */
36fa9ef3
MR
414 for (i = 0; i < 8; i++) {
415 for (j = 0; j < 4; j++) {
a46a3ce4
FB
416 /* top */
417 sum = 0;
36fa9ef3
MR
418 for (k = 0; k < 4; k++)
419 sum += c4[k][j] * block2[8 * (2 * k) + i];
420 block3[8 * (2 * j) + i] = sum;
a46a3ce4
FB
421
422 /* bottom */
423 sum = 0;
36fa9ef3
MR
424 for (k = 0; k < 4; k++)
425 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
426 block3[8 * (2 * j + 1) + i] = sum;
a46a3ce4
FB
427 }
428 }
429
430 /* clamp and store the result */
36fa9ef3
MR
431 for (i = 0; i < 8; i++) {
432 for (j = 0; j < 8; j++) {
433 v = block3[8 * i + j];
434 if (v < 0) v = 0;
435 else if (v > 255) v = 255;
436 dest[i * linesize + j] = (int) rint(v);
a46a3ce4
FB
437 }
438 }
439}
440
504ffed1 441static void idct248_error(const char *name,
36fa9ef3
MR
442 void (*idct248_put)(uint8_t *dest, int line_size,
443 int16_t *block))
a46a3ce4
FB
444{
445 int it, i, it1, ti, ti1, err_max, v;
64bde197 446 AVLFG prng;
294eaa26 447
64bde197 448 av_lfg_init(&prng, 1);
115329f1 449
a46a3ce4
FB
450 /* just one test to see if code is correct (precision is less
451 important here) */
452 err_max = 0;
36fa9ef3 453 for (it = 0; it < NB_ITS; it++) {
652f0197 454 /* XXX: use forward transform to generate values */
36fa9ef3 455 for (i = 0; i < 64; i++)
64bde197 456 block1[i] = av_lfg_get(&prng) % 256 - 128;
652f0197
FB
457 block1[0] += 1024;
458
36fa9ef3
MR
459 for (i = 0; i < 64; i++)
460 block[i] = block1[i];
a46a3ce4 461 idct248_ref(img_dest1, 8, block);
115329f1 462
36fa9ef3
MR
463 for (i = 0; i < 64; i++)
464 block[i] = block1[i];
652f0197 465 idct248_put(img_dest, 8, block);
115329f1 466
36fa9ef3
MR
467 for (i = 0; i < 64; i++) {
468 v = abs((int) img_dest[i] - (int) img_dest1[i]);
652f0197
FB
469 if (v == 255)
470 printf("%d %d\n", img_dest[i], img_dest1[i]);
471 if (v > err_max)
472 err_max = v;
473 }
a46a3ce4 474 }
36fa9ef3 475 printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
a46a3ce4
FB
476
477 ti = gettime();
478 it1 = 0;
479 do {
36fa9ef3
MR
480 for (it = 0; it < NB_ITS_SPEED; it++) {
481 for (i = 0; i < 64; i++)
482 block[i] = block1[i];
a46a3ce4
FB
483 idct248_put(img_dest, 8, block);
484 }
485 it1 += NB_ITS_SPEED;
486 ti1 = gettime() - ti;
487 } while (ti1 < 1000000);
aadd27cd 488 mmx_emms();
a46a3ce4 489
36fa9ef3
MR
490 printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
491 (double) it1 * 1000.0 / (double) ti1);
a46a3ce4
FB
492}
493
504ffed1 494static void help(void)
9e1586fc 495{
86748dbc
MN
496 printf("dct-test [-i] [<test-number>]\n"
497 "test-number 0 -> test with random matrixes\n"
498 " 1 -> test with random sparse matrixes\n"
499 " 2 -> do 3. test from mpeg4 std\n"
a46a3ce4
FB
500 "-i test IDCT implementations\n"
501 "-4 test IDCT248 implementations\n");
9e1586fc
FB
502}
503
de6d9b64
FB
504int main(int argc, char **argv)
505{
a46a3ce4 506 int test_idct = 0, test_248_dct = 0;
36fa9ef3
MR
507 int c, i;
508 int test = 1;
509
c6c98d08 510 cpu_flags = av_get_cpu_flags();
9e1586fc 511
0de74546 512 ff_ref_dct_init();
9e1586fc 513 idct_mmx_init();
f67a10cd 514
36fa9ef3
MR
515 for (i = 0; i < 256; i++)
516 cropTbl[i + MAX_NEG_CROP] = i;
517 for (i = 0; i < MAX_NEG_CROP; i++) {
486497e0
MR
518 cropTbl[i] = 0;
519 cropTbl[i + MAX_NEG_CROP + 256] = 255;
86748dbc 520 }
115329f1 521
36fa9ef3 522 for (;;) {
a46a3ce4 523 c = getopt(argc, argv, "ih4");
9e1586fc
FB
524 if (c == -1)
525 break;
36fa9ef3 526 switch (c) {
9e1586fc
FB
527 case 'i':
528 test_idct = 1;
529 break;
a46a3ce4
FB
530 case '4':
531 test_248_dct = 1;
532 break;
36fa9ef3 533 default:
9e1586fc
FB
534 case 'h':
535 help();
c6bdc908 536 return 0;
9e1586fc
FB
537 }
538 }
115329f1 539
36fa9ef3
MR
540 if (optind < argc)
541 test = atoi(argv[optind]);
115329f1 542
9e1586fc
FB
543 printf("ffmpeg DCT/IDCT test\n");
544
a46a3ce4 545 if (test_248_dct) {
59e6f60a 546 idct248_error("SIMPLE-C", ff_simple_idct248_put);
9e1586fc 547 } else {
4b357756 548 const struct algo *algos = test_idct ? idct_tab : fdct_tab;
36fa9ef3 549 for (i = 0; algos[i].name; i++)
4b357756 550 if (!(~cpu_flags & algos[i].mm_support)) {
4f905a65 551 dct_error(&algos[i], test);
36fa9ef3 552 }
9e1586fc 553 }
de6d9b64
FB
554 return 0;
555}