build: Remove deleted 'check' target from .PHONY list.
[libav.git] / libavcodec / dct-test.c
CommitLineData
04d7f601
DB
1/*
2 * (c) 2001 Fabrice Bellard
3ac35bdb 3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
04d7f601 4 *
2912e87a 5 * This file is part of Libav.
b78e7197 6 *
2912e87a 7 * Libav is free software; you can redistribute it and/or
04d7f601
DB
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
04d7f601 11 *
2912e87a 12 * Libav is distributed in the hope that it will be useful,
04d7f601
DB
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
2912e87a 18 * License along with Libav; if not, write to the Free Software
04d7f601
DB
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
983e3246 22/**
ba87f080 23 * @file
94f694a4 24 * DCT test (c) 2001 Fabrice Bellard
983e3246
MN
25 * Started from sample code by Juan J. Sierralta P.
26 */
27
de6d9b64
FB
28#include <stdlib.h>
29#include <stdio.h>
30#include <string.h>
31#include <sys/time.h>
32#include <unistd.h>
12807c8d 33#include <math.h>
de6d9b64 34
c6c98d08 35#include "libavutil/cpu.h"
ae32e509 36#include "libavutil/common.h"
294eaa26 37#include "libavutil/lfg.h"
de6d9b64 38
86748dbc 39#include "simple_idct.h"
10ac3618 40#include "aandcttab.h"
65e4c8c9 41#include "faandct.h"
6f08c541 42#include "faanidct.h"
a6493a8f 43#include "x86/idct_xvid.h"
6a813295 44#include "dctref.h"
9e1586fc 45
434df899
MN
46#undef printf
47
9686df2b
DB
48void ff_mmx_idct(DCTELEM *data);
49void ff_mmxext_idct(DCTELEM *data);
9e1586fc 50
9686df2b 51void odivx_idct_c(short *block);
86748dbc 52
3ac35bdb 53// BFIN
9686df2b
DB
54void ff_bfin_idct(DCTELEM *block);
55void ff_bfin_fdct(DCTELEM *block);
3ac35bdb
MH
56
57// ALTIVEC
9686df2b
DB
58void fdct_altivec(DCTELEM *block);
59//void idct_altivec(DCTELEM *block);?? no routine
3ac35bdb 60
479044ce 61// ARM
0926c009
MR
62void ff_j_rev_dct_arm(DCTELEM *data);
63void ff_simple_idct_arm(DCTELEM *data);
64void ff_simple_idct_armv5te(DCTELEM *data);
479044ce
MR
65void ff_simple_idct_armv6(DCTELEM *data);
66void ff_simple_idct_neon(DCTELEM *data);
3ac35bdb 67
2a839eeb
MR
68void ff_simple_idct_axp(DCTELEM *data);
69
3ac35bdb 70struct algo {
36fa9ef3 71 const char *name;
36fa9ef3
MR
72 void (*func)(DCTELEM *block);
73 void (*ref) (DCTELEM *block);
74 enum formattag { NO_PERM, MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM,
75 SSE2_PERM, PARTTRANS_PERM } format;
76 int mm_support;
dbf396d4 77 int nonspec;
3ac35bdb
MH
78};
79
80#ifndef FAAN_POSTSCALE
81#define FAAN_SCALE SCALE_PERM
82#else
83#define FAAN_SCALE NO_PERM
84#endif
85
aadd27cd
MN
86static int cpu_flags;
87
4b357756 88static const struct algo fdct_tab[] = {
cf2b4f88
MR
89 {"REF-DBL", ff_ref_fdct, ff_ref_fdct, NO_PERM},
90 {"FAAN", ff_faandct, ff_ref_fdct, FAAN_SCALE},
91 {"IJG-AAN-INT", fdct_ifast, ff_ref_fdct, SCALE_PERM},
92 {"IJG-LLM-INT", ff_jpeg_fdct_islow, ff_ref_fdct, NO_PERM},
3ac35bdb 93
b250f9c6 94#if HAVE_MMX
cf2b4f88
MR
95 {"MMX", ff_fdct_mmx, ff_ref_fdct, NO_PERM, AV_CPU_FLAG_MMX},
96 {"MMX2", ff_fdct_mmx2, ff_ref_fdct, NO_PERM, AV_CPU_FLAG_MMX2},
97 {"SSE2", ff_fdct_sse2, ff_ref_fdct, NO_PERM, AV_CPU_FLAG_SSE2},
94254fc0 98#endif
3ac35bdb 99
4b357756 100#if HAVE_ALTIVEC
cf2b4f88 101 {"altivecfdct", fdct_altivec, ff_ref_fdct, NO_PERM, AV_CPU_FLAG_ALTIVEC},
4b357756
MR
102#endif
103
104#if ARCH_BFIN
cf2b4f88 105 {"BFINfdct", ff_bfin_fdct, ff_ref_fdct, NO_PERM},
4b357756
MR
106#endif
107
108 { 0 }
109};
110
111static const struct algo idct_tab[] = {
cf2b4f88
MR
112 {"FAANI", ff_faanidct, ff_ref_idct, NO_PERM},
113 {"REF-DBL", ff_ref_idct, ff_ref_idct, NO_PERM},
114 {"INT", j_rev_dct, ff_ref_idct, MMX_PERM},
115 {"SIMPLE-C", ff_simple_idct, ff_ref_idct, NO_PERM},
4b357756
MR
116
117#if HAVE_MMX
b250f9c6 118#if CONFIG_GPL
dbf396d4
MR
119 {"LIBMPEG2-MMX", ff_mmx_idct, ff_ref_idct, MMX_PERM, AV_CPU_FLAG_MMX, 1},
120 {"LIBMPEG2-MMX2", ff_mmxext_idct, ff_ref_idct, MMX_PERM, AV_CPU_FLAG_MMX2, 1},
b9702de5 121#endif
cf2b4f88 122 {"SIMPLE-MMX", ff_simple_idct_mmx, ff_ref_idct, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX},
dbf396d4
MR
123 {"XVID-MMX", ff_idct_xvid_mmx, ff_ref_idct, NO_PERM, AV_CPU_FLAG_MMX, 1},
124 {"XVID-MMX2", ff_idct_xvid_mmx2, ff_ref_idct, NO_PERM, AV_CPU_FLAG_MMX2, 1},
125 {"XVID-SSE2", ff_idct_xvid_sse2, ff_ref_idct, SSE2_PERM, AV_CPU_FLAG_SSE2, 1},
3ac35bdb
MH
126#endif
127
b250f9c6 128#if ARCH_BFIN
cf2b4f88 129 {"BFINidct", ff_bfin_idct, ff_ref_idct, NO_PERM},
3ac35bdb
MH
130#endif
131
b250f9c6 132#if ARCH_ARM
cf2b4f88
MR
133 {"SIMPLE-ARM", ff_simple_idct_arm, ff_ref_idct, NO_PERM },
134 {"INT-ARM", ff_j_rev_dct_arm, ff_ref_idct, MMX_PERM },
4b357756 135#endif
b250f9c6 136#if HAVE_ARMV5TE
cf2b4f88 137 {"SIMPLE-ARMV5TE", ff_simple_idct_armv5te, ff_ref_idct, NO_PERM },
479044ce 138#endif
b250f9c6 139#if HAVE_ARMV6
cf2b4f88 140 {"SIMPLE-ARMV6", ff_simple_idct_armv6, ff_ref_idct, MMX_PERM },
479044ce 141#endif
b250f9c6 142#if HAVE_NEON
cf2b4f88 143 {"SIMPLE-NEON", ff_simple_idct_neon, ff_ref_idct, PARTTRANS_PERM },
479044ce 144#endif
479044ce 145
2a839eeb 146#if ARCH_ALPHA
cf2b4f88 147 {"SIMPLE-ALPHA", ff_simple_idct_axp, ff_ref_idct, NO_PERM },
2a839eeb
MR
148#endif
149
36fa9ef3 150 { 0 }
3ac35bdb
MH
151};
152
de6d9b64 153#define AANSCALE_BITS 12
de6d9b64 154
486497e0 155uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
86748dbc 156
504ffed1 157static int64_t gettime(void)
de6d9b64
FB
158{
159 struct timeval tv;
36fa9ef3 160 gettimeofday(&tv, NULL);
0c1a9eda 161 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
de6d9b64
FB
162}
163
164#define NB_ITS 20000
165#define NB_ITS_SPEED 50000
166
9e1586fc
FB
167static short idct_mmx_perm[64];
168
36fa9ef3
MR
169static short idct_simple_mmx_perm[64] = {
170 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
171 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
172 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
173 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
174 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
175 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
176 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
177 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
86748dbc
MN
178};
179
36fa9ef3 180static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
ad246860 181
504ffed1 182static void idct_mmx_init(void)
9e1586fc
FB
183{
184 int i;
185
186 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
187 for (i = 0; i < 64; i++) {
bb270c08 188 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
9e1586fc
FB
189 }
190}
191
c6727809 192DECLARE_ALIGNED(16, static DCTELEM, block)[64];
36fa9ef3
MR
193DECLARE_ALIGNED(8, static DCTELEM, block1)[64];
194DECLARE_ALIGNED(8, static DCTELEM, block_org)[64];
9e1586fc 195
aadd27cd
MN
196static inline void mmx_emms(void)
197{
b250f9c6 198#if HAVE_MMX
7160bb71 199 if (cpu_flags & AV_CPU_FLAG_MMX)
be449fca 200 __asm__ volatile ("emms\n\t");
aadd27cd
MN
201#endif
202}
203
dbf396d4 204static int dct_error(const struct algo *dct, int test, int is_idct, int speed)
de6d9b64
FB
205{
206 int it, i, scale;
de6d9b64 207 int err_inf, v;
dbf396d4 208 int64_t err2, ti, ti1, it1, err_sum = 0;
36fa9ef3
MR
209 int64_t sysErr[64], sysErrMax = 0;
210 int maxout = 0;
211 int blockSumErrMax = 0, blockSumErr;
64bde197 212 AVLFG prng;
dbf396d4
MR
213 double omse, ome;
214 int spec_err;
de6d9b64 215
64bde197 216 av_lfg_init(&prng, 1);
de6d9b64
FB
217
218 err_inf = 0;
219 err2 = 0;
36fa9ef3
MR
220 for (i = 0; i < 64; i++)
221 sysErr[i] = 0;
222 for (it = 0; it < NB_ITS; it++) {
223 for (i = 0; i < 64; i++)
86748dbc 224 block1[i] = 0;
36fa9ef3 225 switch (test) {
115329f1 226 case 0:
36fa9ef3
MR
227 for (i = 0; i < 64; i++)
228 block1[i] = (av_lfg_get(&prng) % 512) - 256;
cf2b4f88 229 if (is_idct) {
0de74546 230 ff_ref_fdct(block1);
36fa9ef3
MR
231 for (i = 0; i < 64; i++)
232 block1[i] >>= 3;
ad324c93 233 }
36fa9ef3
MR
234 break;
235 case 1: {
236 int num = av_lfg_get(&prng) % 10 + 1;
237 for (i = 0; i < num; i++)
238 block1[av_lfg_get(&prng) % 64] =
239 av_lfg_get(&prng) % 512 - 256;
240 }
241 break;
86748dbc 242 case 2:
64bde197 243 block1[0] = av_lfg_get(&prng) % 4096 - 2048;
36fa9ef3
MR
244 block1[63] = (block1[0] & 1) ^ 1;
245 break;
86748dbc 246 }
9e1586fc 247
36fa9ef3
MR
248 for (i = 0; i < 64; i++)
249 block_org[i] = block1[i];
9e1586fc 250
4f905a65 251 if (dct->format == MMX_PERM) {
36fa9ef3 252 for (i = 0; i < 64; i++)
9e1586fc 253 block[idct_mmx_perm[i]] = block1[i];
4f905a65 254 } else if (dct->format == MMX_SIMPLE_PERM) {
36fa9ef3 255 for (i = 0; i < 64; i++)
86748dbc 256 block[idct_simple_mmx_perm[i]] = block1[i];
4f905a65 257 } else if (dct->format == SSE2_PERM) {
36fa9ef3
MR
258 for (i = 0; i < 64; i++)
259 block[(i & 0x38) | idct_sse2_row_perm[i & 7]] = block1[i];
4f905a65 260 } else if (dct->format == PARTTRANS_PERM) {
36fa9ef3
MR
261 for (i = 0; i < 64; i++)
262 block[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = block1[i];
bb270c08 263 } else {
36fa9ef3
MR
264 for (i = 0; i < 64; i++)
265 block[i] = block1[i];
9e1586fc
FB
266 }
267
4f905a65 268 dct->func(block);
aadd27cd 269 mmx_emms();
9e1586fc 270
4f905a65 271 if (dct->format == SCALE_PERM) {
36fa9ef3
MR
272 for (i = 0; i < 64; i++) {
273 scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
274 block[i] = (block[i] * scale) >> AANSCALE_BITS;
86748dbc
MN
275 }
276 }
277
4f905a65 278 dct->ref(block1);
de6d9b64 279
36fa9ef3
MR
280 blockSumErr = 0;
281 for (i = 0; i < 64; i++) {
dbf396d4
MR
282 int err = block[i] - block1[i];
283 err_sum += err;
284 v = abs(err);
de6d9b64
FB
285 if (v > err_inf)
286 err_inf = v;
287 err2 += v * v;
bb270c08
DB
288 sysErr[i] += block[i] - block1[i];
289 blockSumErr += v;
36fa9ef3
MR
290 if (abs(block[i]) > maxout)
291 maxout = abs(block[i]);
de6d9b64 292 }
36fa9ef3
MR
293 if (blockSumErrMax < blockSumErr)
294 blockSumErrMax = blockSumErr;
86748dbc 295 }
36fa9ef3
MR
296 for (i = 0; i < 64; i++)
297 sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i]));
115329f1 298
36fa9ef3
MR
299 for (i = 0; i < 64; i++) {
300 if (i % 8 == 0)
301 printf("\n");
302 printf("%7d ", (int) sysErr[i]);
de6d9b64 303 }
86748dbc 304 printf("\n");
115329f1 305
dbf396d4
MR
306 omse = (double) err2 / NB_ITS / 64;
307 ome = (double) err_sum / NB_ITS / 64;
308
309 spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
310
311 printf("%s %s: ppe=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
cf2b4f88 312 is_idct ? "IDCT" : "DCT", dct->name, err_inf,
dbf396d4 313 omse, ome, (double) sysErrMax / NB_ITS,
36fa9ef3 314 maxout, blockSumErrMax);
e6ff0648 315
dbf396d4
MR
316 if (spec_err && !dct->nonspec)
317 return 1;
318
7fd2c138 319 if (!speed)
dbf396d4 320 return 0;
7fd2c138 321
de6d9b64 322 /* speed test */
36fa9ef3 323 for (i = 0; i < 64; i++)
86748dbc 324 block1[i] = 0;
36fa9ef3
MR
325
326 switch (test) {
115329f1 327 case 0:
36fa9ef3
MR
328 for (i = 0; i < 64; i++)
329 block1[i] = av_lfg_get(&prng) % 512 - 256;
cf2b4f88 330 if (is_idct) {
0de74546 331 ff_ref_fdct(block1);
36fa9ef3
MR
332 for (i = 0; i < 64; i++)
333 block1[i] >>= 3;
ad324c93 334 }
36fa9ef3
MR
335 break;
336 case 1:
86748dbc 337 case 2:
36fa9ef3
MR
338 block1[0] = av_lfg_get(&prng) % 512 - 256;
339 block1[1] = av_lfg_get(&prng) % 512 - 256;
340 block1[2] = av_lfg_get(&prng) % 512 - 256;
341 block1[3] = av_lfg_get(&prng) % 512 - 256;
342 break;
86748dbc 343 }
de6d9b64 344
4f905a65 345 if (dct->format == MMX_PERM) {
36fa9ef3 346 for (i = 0; i < 64; i++)
9e1586fc 347 block[idct_mmx_perm[i]] = block1[i];
4f905a65 348 } else if (dct->format == MMX_SIMPLE_PERM) {
36fa9ef3 349 for (i = 0; i < 64; i++)
86748dbc
MN
350 block[idct_simple_mmx_perm[i]] = block1[i];
351 } else {
36fa9ef3
MR
352 for (i = 0; i < 64; i++)
353 block[i] = block1[i];
9e1586fc
FB
354 }
355
de6d9b64
FB
356 ti = gettime();
357 it1 = 0;
358 do {
36fa9ef3
MR
359 for (it = 0; it < NB_ITS_SPEED; it++) {
360 for (i = 0; i < 64; i++)
361 block[i] = block1[i];
4f905a65 362 dct->func(block);
de6d9b64
FB
363 }
364 it1 += NB_ITS_SPEED;
365 ti1 = gettime() - ti;
366 } while (ti1 < 1000000);
aadd27cd 367 mmx_emms();
de6d9b64 368
cf2b4f88 369 printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name,
36fa9ef3 370 (double) it1 * 1000.0 / (double) ti1);
dbf396d4
MR
371
372 return 0;
de6d9b64
FB
373}
374
c6727809
MR
375DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
376DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];
a46a3ce4 377
504ffed1 378static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
a46a3ce4
FB
379{
380 static int init;
381 static double c8[8][8];
382 static double c4[4][4];
383 double block1[64], block2[64], block3[64];
384 double s, sum, v;
385 int i, j, k;
386
387 if (!init) {
388 init = 1;
389
36fa9ef3 390 for (i = 0; i < 8; i++) {
a46a3ce4 391 sum = 0;
36fa9ef3
MR
392 for (j = 0; j < 8; j++) {
393 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
a46a3ce4
FB
394 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
395 sum += c8[i][j] * c8[i][j];
396 }
397 }
115329f1 398
36fa9ef3 399 for (i = 0; i < 4; i++) {
a46a3ce4 400 sum = 0;
36fa9ef3
MR
401 for (j = 0; j < 4; j++) {
402 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
a46a3ce4
FB
403 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
404 sum += c4[i][j] * c4[i][j];
405 }
406 }
407 }
408
409 /* butterfly */
652f0197 410 s = 0.5 * sqrt(2.0);
36fa9ef3
MR
411 for (i = 0; i < 4; i++) {
412 for (j = 0; j < 8; j++) {
413 block1[8 * (2 * i) + j] =
414 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
415 block1[8 * (2 * i + 1) + j] =
416 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
a46a3ce4
FB
417 }
418 }
419
420 /* idct8 on lines */
36fa9ef3
MR
421 for (i = 0; i < 8; i++) {
422 for (j = 0; j < 8; j++) {
a46a3ce4 423 sum = 0;
36fa9ef3
MR
424 for (k = 0; k < 8; k++)
425 sum += c8[k][j] * block1[8 * i + k];
426 block2[8 * i + j] = sum;
a46a3ce4
FB
427 }
428 }
429
430 /* idct4 */
36fa9ef3
MR
431 for (i = 0; i < 8; i++) {
432 for (j = 0; j < 4; j++) {
a46a3ce4
FB
433 /* top */
434 sum = 0;
36fa9ef3
MR
435 for (k = 0; k < 4; k++)
436 sum += c4[k][j] * block2[8 * (2 * k) + i];
437 block3[8 * (2 * j) + i] = sum;
a46a3ce4
FB
438
439 /* bottom */
440 sum = 0;
36fa9ef3
MR
441 for (k = 0; k < 4; k++)
442 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
443 block3[8 * (2 * j + 1) + i] = sum;
a46a3ce4
FB
444 }
445 }
446
447 /* clamp and store the result */
36fa9ef3
MR
448 for (i = 0; i < 8; i++) {
449 for (j = 0; j < 8; j++) {
450 v = block3[8 * i + j];
451 if (v < 0) v = 0;
452 else if (v > 255) v = 255;
453 dest[i * linesize + j] = (int) rint(v);
a46a3ce4
FB
454 }
455 }
456}
457
504ffed1 458static void idct248_error(const char *name,
36fa9ef3 459 void (*idct248_put)(uint8_t *dest, int line_size,
7fd2c138
MR
460 int16_t *block),
461 int speed)
a46a3ce4
FB
462{
463 int it, i, it1, ti, ti1, err_max, v;
64bde197 464 AVLFG prng;
294eaa26 465
64bde197 466 av_lfg_init(&prng, 1);
115329f1 467
a46a3ce4
FB
468 /* just one test to see if code is correct (precision is less
469 important here) */
470 err_max = 0;
36fa9ef3 471 for (it = 0; it < NB_ITS; it++) {
652f0197 472 /* XXX: use forward transform to generate values */
36fa9ef3 473 for (i = 0; i < 64; i++)
64bde197 474 block1[i] = av_lfg_get(&prng) % 256 - 128;
652f0197
FB
475 block1[0] += 1024;
476
36fa9ef3
MR
477 for (i = 0; i < 64; i++)
478 block[i] = block1[i];
a46a3ce4 479 idct248_ref(img_dest1, 8, block);
115329f1 480
36fa9ef3
MR
481 for (i = 0; i < 64; i++)
482 block[i] = block1[i];
652f0197 483 idct248_put(img_dest, 8, block);
115329f1 484
36fa9ef3
MR
485 for (i = 0; i < 64; i++) {
486 v = abs((int) img_dest[i] - (int) img_dest1[i]);
652f0197
FB
487 if (v == 255)
488 printf("%d %d\n", img_dest[i], img_dest1[i]);
489 if (v > err_max)
490 err_max = v;
491 }
a46a3ce4 492 }
36fa9ef3 493 printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
a46a3ce4 494
7fd2c138
MR
495 if (!speed)
496 return;
497
a46a3ce4
FB
498 ti = gettime();
499 it1 = 0;
500 do {
36fa9ef3
MR
501 for (it = 0; it < NB_ITS_SPEED; it++) {
502 for (i = 0; i < 64; i++)
503 block[i] = block1[i];
a46a3ce4
FB
504 idct248_put(img_dest, 8, block);
505 }
506 it1 += NB_ITS_SPEED;
507 ti1 = gettime() - ti;
508 } while (ti1 < 1000000);
aadd27cd 509 mmx_emms();
a46a3ce4 510
36fa9ef3
MR
511 printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
512 (double) it1 * 1000.0 / (double) ti1);
a46a3ce4
FB
513}
514
504ffed1 515static void help(void)
9e1586fc 516{
86748dbc
MN
517 printf("dct-test [-i] [<test-number>]\n"
518 "test-number 0 -> test with random matrixes\n"
519 " 1 -> test with random sparse matrixes\n"
520 " 2 -> do 3. test from mpeg4 std\n"
a46a3ce4 521 "-i test IDCT implementations\n"
7fd2c138
MR
522 "-4 test IDCT248 implementations\n"
523 "-t speed test\n");
9e1586fc
FB
524}
525
de6d9b64
FB
526int main(int argc, char **argv)
527{
a46a3ce4 528 int test_idct = 0, test_248_dct = 0;
36fa9ef3
MR
529 int c, i;
530 int test = 1;
7fd2c138 531 int speed = 0;
dbf396d4 532 int err = 0;
36fa9ef3 533
c6c98d08 534 cpu_flags = av_get_cpu_flags();
9e1586fc 535
0de74546 536 ff_ref_dct_init();
9e1586fc 537 idct_mmx_init();
f67a10cd 538
36fa9ef3
MR
539 for (i = 0; i < 256; i++)
540 cropTbl[i + MAX_NEG_CROP] = i;
541 for (i = 0; i < MAX_NEG_CROP; i++) {
486497e0
MR
542 cropTbl[i] = 0;
543 cropTbl[i + MAX_NEG_CROP + 256] = 255;
86748dbc 544 }
115329f1 545
36fa9ef3 546 for (;;) {
7fd2c138 547 c = getopt(argc, argv, "ih4t");
9e1586fc
FB
548 if (c == -1)
549 break;
36fa9ef3 550 switch (c) {
9e1586fc
FB
551 case 'i':
552 test_idct = 1;
553 break;
a46a3ce4
FB
554 case '4':
555 test_248_dct = 1;
556 break;
7fd2c138
MR
557 case 't':
558 speed = 1;
559 break;
36fa9ef3 560 default:
9e1586fc
FB
561 case 'h':
562 help();
c6bdc908 563 return 0;
9e1586fc
FB
564 }
565 }
115329f1 566
36fa9ef3
MR
567 if (optind < argc)
568 test = atoi(argv[optind]);
115329f1 569
9e1586fc
FB
570 printf("ffmpeg DCT/IDCT test\n");
571
a46a3ce4 572 if (test_248_dct) {
7fd2c138 573 idct248_error("SIMPLE-C", ff_simple_idct248_put, speed);
9e1586fc 574 } else {
4b357756 575 const struct algo *algos = test_idct ? idct_tab : fdct_tab;
36fa9ef3 576 for (i = 0; algos[i].name; i++)
4b357756 577 if (!(~cpu_flags & algos[i].mm_support)) {
dbf396d4 578 err |= dct_error(&algos[i], test, test_idct, speed);
36fa9ef3 579 }
9e1586fc 580 }
dbf396d4
MR
581
582 return err;
de6d9b64 583}