Rename the 'put_zero' parameter of ff_put_string() to
[libav.git] / libavcodec / dct-test.c
CommitLineData
04d7f601
DB
1/*
2 * (c) 2001 Fabrice Bellard
3ac35bdb 3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
04d7f601 4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
04d7f601
DB
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
04d7f601 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
04d7f601
DB
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
04d7f601
DB
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
983e3246 22/**
bad5537e 23 * @file libavcodec/dct-test.c
94f694a4 24 * DCT test (c) 2001 Fabrice Bellard
983e3246
MN
25 * Started from sample code by Juan J. Sierralta P.
26 */
27
de6d9b64
FB
28#include <stdlib.h>
29#include <stdio.h>
30#include <string.h>
31#include <sys/time.h>
32#include <unistd.h>
12807c8d 33#include <math.h>
de6d9b64 34
ae32e509 35#include "libavutil/common.h"
294eaa26 36#include "libavutil/lfg.h"
de6d9b64 37
86748dbc 38#include "simple_idct.h"
10ac3618 39#include "aandcttab.h"
65e4c8c9 40#include "faandct.h"
6f08c541 41#include "faanidct.h"
a6493a8f 42#include "x86/idct_xvid.h"
9e1586fc 43
434df899
MN
44#undef printf
45
46void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
47
9e1586fc 48/* reference fdct/idct */
0de74546
DY
49void ff_ref_fdct(DCTELEM *block);
50void ff_ref_idct(DCTELEM *block);
51void ff_ref_dct_init(void);
de6d9b64 52
9686df2b
DB
53void ff_mmx_idct(DCTELEM *data);
54void ff_mmxext_idct(DCTELEM *data);
9e1586fc 55
9686df2b 56void odivx_idct_c(short *block);
86748dbc 57
3ac35bdb 58// BFIN
9686df2b
DB
59void ff_bfin_idct(DCTELEM *block);
60void ff_bfin_fdct(DCTELEM *block);
3ac35bdb
MH
61
62// ALTIVEC
9686df2b
DB
63void fdct_altivec(DCTELEM *block);
64//void idct_altivec(DCTELEM *block);?? no routine
3ac35bdb 65
479044ce
MR
66// ARM
67void j_rev_dct_ARM(DCTELEM *data);
68void simple_idct_ARM(DCTELEM *data);
69void simple_idct_armv5te(DCTELEM *data);
70void ff_simple_idct_armv6(DCTELEM *data);
71void ff_simple_idct_neon(DCTELEM *data);
3ac35bdb 72
2a839eeb
MR
73void ff_simple_idct_axp(DCTELEM *data);
74
3ac35bdb 75struct algo {
f5b67781 76 const char *name;
3ac35bdb
MH
77 enum { FDCT, IDCT } is_idct;
78 void (* func) (DCTELEM *block);
79 void (* ref) (DCTELEM *block);
875f3125 80 enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, SSE2_PERM, PARTTRANS_PERM } format;
3794b928 81 int mm_support;
3ac35bdb
MH
82};
83
84#ifndef FAAN_POSTSCALE
85#define FAAN_SCALE SCALE_PERM
86#else
87#define FAAN_SCALE NO_PERM
88#endif
89
aadd27cd
MN
90static int cpu_flags;
91
3ac35bdb 92struct algo algos[] = {
0de74546
DY
93 {"REF-DBL", 0, ff_ref_fdct, ff_ref_fdct, NO_PERM},
94 {"FAAN", 0, ff_faandct, ff_ref_fdct, FAAN_SCALE},
95 {"FAANI", 1, ff_faanidct, ff_ref_idct, NO_PERM},
96 {"IJG-AAN-INT", 0, fdct_ifast, ff_ref_fdct, SCALE_PERM},
97 {"IJG-LLM-INT", 0, ff_jpeg_fdct_islow, ff_ref_fdct, NO_PERM},
98 {"REF-DBL", 1, ff_ref_idct, ff_ref_idct, NO_PERM},
99 {"INT", 1, j_rev_dct, ff_ref_idct, MMX_PERM},
100 {"SIMPLE-C", 1, ff_simple_idct, ff_ref_idct, NO_PERM},
3ac35bdb 101
b250f9c6 102#if HAVE_MMX
0de74546 103 {"MMX", 0, ff_fdct_mmx, ff_ref_fdct, NO_PERM, FF_MM_MMX},
b250f9c6 104#if HAVE_MMX2
0de74546
DY
105 {"MMX2", 0, ff_fdct_mmx2, ff_ref_fdct, NO_PERM, FF_MM_MMXEXT},
106 {"SSE2", 0, ff_fdct_sse2, ff_ref_fdct, NO_PERM, FF_MM_SSE2},
94254fc0 107#endif
3ac35bdb 108
b250f9c6 109#if CONFIG_GPL
0de74546
DY
110 {"LIBMPEG2-MMX", 1, ff_mmx_idct, ff_ref_idct, MMX_PERM, FF_MM_MMX},
111 {"LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, ff_ref_idct, MMX_PERM, FF_MM_MMXEXT},
b9702de5 112#endif
0de74546
DY
113 {"SIMPLE-MMX", 1, ff_simple_idct_mmx, ff_ref_idct, MMX_SIMPLE_PERM, FF_MM_MMX},
114 {"XVID-MMX", 1, ff_idct_xvid_mmx, ff_ref_idct, NO_PERM, FF_MM_MMX},
115 {"XVID-MMX2", 1, ff_idct_xvid_mmx2, ff_ref_idct, NO_PERM, FF_MM_MMXEXT},
116 {"XVID-SSE2", 1, ff_idct_xvid_sse2, ff_ref_idct, SSE2_PERM, FF_MM_SSE2},
3ac35bdb
MH
117#endif
118
b250f9c6 119#if HAVE_ALTIVEC
0de74546 120 {"altivecfdct", 0, fdct_altivec, ff_ref_fdct, NO_PERM, FF_MM_ALTIVEC},
3ac35bdb
MH
121#endif
122
b250f9c6 123#if ARCH_BFIN
0de74546
DY
124 {"BFINfdct", 0, ff_bfin_fdct, ff_ref_fdct, NO_PERM},
125 {"BFINidct", 1, ff_bfin_idct, ff_ref_idct, NO_PERM},
3ac35bdb
MH
126#endif
127
b250f9c6 128#if ARCH_ARM
0de74546
DY
129 {"SIMPLE-ARM", 1, simple_idct_ARM, ff_ref_idct, NO_PERM },
130 {"INT-ARM", 1, j_rev_dct_ARM, ff_ref_idct, MMX_PERM },
b250f9c6 131#if HAVE_ARMV5TE
0de74546 132 {"SIMPLE-ARMV5TE", 1, simple_idct_armv5te, ff_ref_idct, NO_PERM },
479044ce 133#endif
b250f9c6 134#if HAVE_ARMV6
0de74546 135 {"SIMPLE-ARMV6", 1, ff_simple_idct_armv6, ff_ref_idct, MMX_PERM },
479044ce 136#endif
b250f9c6 137#if HAVE_NEON
0de74546 138 {"SIMPLE-NEON", 1, ff_simple_idct_neon, ff_ref_idct, PARTTRANS_PERM },
479044ce 139#endif
a2fc0f6a 140#endif /* ARCH_ARM */
479044ce 141
2a839eeb 142#if ARCH_ALPHA
0de74546 143 {"SIMPLE-ALPHA", 1, ff_simple_idct_axp, ff_ref_idct, NO_PERM },
2a839eeb
MR
144#endif
145
3ac35bdb
MH
146 { 0 }
147};
148
de6d9b64 149#define AANSCALE_BITS 12
de6d9b64 150
486497e0 151uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
86748dbc 152
504ffed1 153static int64_t gettime(void)
de6d9b64
FB
154{
155 struct timeval tv;
156 gettimeofday(&tv,NULL);
0c1a9eda 157 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
de6d9b64
FB
158}
159
160#define NB_ITS 20000
161#define NB_ITS_SPEED 50000
162
9e1586fc
FB
163static short idct_mmx_perm[64];
164
86748dbc 165static short idct_simple_mmx_perm[64]={
bb270c08
DB
166 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
167 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
168 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
169 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
170 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
171 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
172 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
173 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
86748dbc
MN
174};
175
ad246860
AS
176static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
177
504ffed1 178static void idct_mmx_init(void)
9e1586fc
FB
179{
180 int i;
181
182 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
183 for (i = 0; i < 64; i++) {
bb270c08
DB
184 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
185// idct_simple_mmx_perm[i] = simple_block_permute_op(i);
9e1586fc
FB
186 }
187}
188
55727a83 189static DCTELEM block[64] __attribute__ ((aligned (16)));
9e1586fc 190static DCTELEM block1[64] __attribute__ ((aligned (8)));
86748dbc 191static DCTELEM block_org[64] __attribute__ ((aligned (8)));
9e1586fc 192
aadd27cd
MN
193static inline void mmx_emms(void)
194{
b250f9c6 195#if HAVE_MMX
82d1605f 196 if (cpu_flags & FF_MM_MMX)
be449fca 197 __asm__ volatile ("emms\n\t");
aadd27cd
MN
198#endif
199}
200
504ffed1 201static void dct_error(const char *name, int is_idct,
9e1586fc 202 void (*fdct_func)(DCTELEM *block),
3ac35bdb 203 void (*fdct_ref)(DCTELEM *block), int form, int test)
de6d9b64
FB
204{
205 int it, i, scale;
de6d9b64 206 int err_inf, v;
0c1a9eda
ZK
207 int64_t err2, ti, ti1, it1;
208 int64_t sysErr[64], sysErrMax=0;
86748dbc 209 int maxout=0;
86748dbc 210 int blockSumErrMax=0, blockSumErr;
64bde197 211 AVLFG prng;
de6d9b64 212
64bde197 213 av_lfg_init(&prng, 1);
de6d9b64
FB
214
215 err_inf = 0;
216 err2 = 0;
86748dbc 217 for(i=0; i<64; i++) sysErr[i]=0;
de6d9b64 218 for(it=0;it<NB_ITS;it++) {
86748dbc
MN
219 for(i=0;i<64;i++)
220 block1[i] = 0;
221 switch(test){
115329f1 222 case 0:
86748dbc 223 for(i=0;i<64;i++)
64bde197 224 block1[i] = (av_lfg_get(&prng) % 512) -256;
ad324c93 225 if (is_idct){
0de74546 226 ff_ref_fdct(block1);
ad324c93
MN
227
228 for(i=0;i<64;i++)
229 block1[i]>>=3;
230 }
86748dbc
MN
231 break;
232 case 1:{
64bde197 233 int num = av_lfg_get(&prng) % 10 + 1;
86748dbc 234 for(i=0;i<num;i++)
64bde197 235 block1[av_lfg_get(&prng) % 64] = av_lfg_get(&prng) % 512 -256;
86748dbc
MN
236 }break;
237 case 2:
64bde197 238 block1[0] = av_lfg_get(&prng) % 4096 - 2048;
86748dbc
MN
239 block1[63]= (block1[0]&1)^1;
240 break;
241 }
9e1586fc 242
86748dbc
MN
243#if 0 // simulate mismatch control
244{ int sum=0;
245 for(i=0;i<64;i++)
246 sum+=block1[i];
247
115329f1 248 if((sum&1)==0) block1[63]^=1;
86748dbc
MN
249}
250#endif
251
252 for(i=0; i<64; i++)
253 block_org[i]= block1[i];
9e1586fc 254
3ac35bdb 255 if (form == MMX_PERM) {
86748dbc 256 for(i=0;i<64;i++)
9e1586fc 257 block[idct_mmx_perm[i]] = block1[i];
3ac35bdb 258 } else if (form == MMX_SIMPLE_PERM) {
86748dbc
MN
259 for(i=0;i<64;i++)
260 block[idct_simple_mmx_perm[i]] = block1[i];
261
ad246860
AS
262 } else if (form == SSE2_PERM) {
263 for(i=0; i<64; i++)
264 block[(i&0x38) | idct_sse2_row_perm[i&7]] = block1[i];
875f3125
MR
265 } else if (form == PARTTRANS_PERM) {
266 for(i=0; i<64; i++)
267 block[(i&0x24) | ((i&3)<<3) | ((i>>3)&3)] = block1[i];
bb270c08 268 } else {
86748dbc
MN
269 for(i=0; i<64; i++)
270 block[i]= block1[i];
9e1586fc 271 }
86748dbc
MN
272#if 0 // simulate mismatch control for tested IDCT but not the ref
273{ int sum=0;
274 for(i=0;i<64;i++)
275 sum+=block[i];
276
115329f1 277 if((sum&1)==0) block[63]^=1;
86748dbc
MN
278}
279#endif
9e1586fc 280
de6d9b64 281 fdct_func(block);
aadd27cd 282 mmx_emms();
9e1586fc 283
3ac35bdb 284 if (form == SCALE_PERM) {
de6d9b64 285 for(i=0; i<64; i++) {
10ac3618 286 scale = 8*(1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
86748dbc
MN
287 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
288 }
289 }
290
9e1586fc 291 fdct_ref(block1);
de6d9b64 292
86748dbc 293 blockSumErr=0;
de6d9b64
FB
294 for(i=0;i<64;i++) {
295 v = abs(block[i] - block1[i]);
296 if (v > err_inf)
297 err_inf = v;
298 err2 += v * v;
bb270c08
DB
299 sysErr[i] += block[i] - block1[i];
300 blockSumErr += v;
301 if( abs(block[i])>maxout) maxout=abs(block[i]);
de6d9b64 302 }
86748dbc
MN
303 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
304#if 0 // print different matrix pairs
305 if(blockSumErr){
306 printf("\n");
307 for(i=0; i<64; i++){
308 if((i&7)==0) printf("\n");
309 printf("%4d ", block_org[i]);
310 }
311 for(i=0; i<64; i++){
312 if((i&7)==0) printf("\n");
313 printf("%4d ", block[i] - block1[i]);
314 }
315 }
316#endif
317 }
ae32e509 318 for(i=0; i<64; i++) sysErrMax= FFMAX(sysErrMax, FFABS(sysErr[i]));
115329f1 319
86748dbc
MN
320#if 1 // dump systematic errors
321 for(i=0; i<64; i++){
bb270c08 322 if(i%8==0) printf("\n");
2029e934 323 printf("%7d ", (int)sysErr[i]);
de6d9b64 324 }
86748dbc
MN
325 printf("\n");
326#endif
115329f1 327
86748dbc 328 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
9e1586fc 329 is_idct ? "IDCT" : "DCT",
86748dbc
MN
330 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
331#if 1 //Speed test
de6d9b64 332 /* speed test */
86748dbc
MN
333 for(i=0;i<64;i++)
334 block1[i] = 0;
335 switch(test){
115329f1 336 case 0:
86748dbc 337 for(i=0;i<64;i++)
64bde197 338 block1[i] = av_lfg_get(&prng) % 512 -256;
ad324c93 339 if (is_idct){
0de74546 340 ff_ref_fdct(block1);
ad324c93
MN
341
342 for(i=0;i<64;i++)
343 block1[i]>>=3;
344 }
86748dbc
MN
345 break;
346 case 1:{
347 case 2:
64bde197
DB
348 block1[0] = av_lfg_get(&prng) % 512 -256;
349 block1[1] = av_lfg_get(&prng) % 512 -256;
350 block1[2] = av_lfg_get(&prng) % 512 -256;
351 block1[3] = av_lfg_get(&prng) % 512 -256;
86748dbc
MN
352 }break;
353 }
de6d9b64 354
3ac35bdb 355 if (form == MMX_PERM) {
86748dbc 356 for(i=0;i<64;i++)
9e1586fc 357 block[idct_mmx_perm[i]] = block1[i];
3ac35bdb 358 } else if(form == MMX_SIMPLE_PERM) {
86748dbc
MN
359 for(i=0;i<64;i++)
360 block[idct_simple_mmx_perm[i]] = block1[i];
361 } else {
362 for(i=0; i<64; i++)
363 block[i]= block1[i];
9e1586fc
FB
364 }
365
de6d9b64
FB
366 ti = gettime();
367 it1 = 0;
368 do {
369 for(it=0;it<NB_ITS_SPEED;it++) {
86748dbc
MN
370 for(i=0; i<64; i++)
371 block[i]= block1[i];
372// memcpy(block, block1, sizeof(DCTELEM) * 64);
755bfeab 373// do not memcpy especially not fastmemcpy because it does movntq !!!
de6d9b64
FB
374 fdct_func(block);
375 }
376 it1 += NB_ITS_SPEED;
377 ti1 = gettime() - ti;
378 } while (ti1 < 1000000);
aadd27cd 379 mmx_emms();
de6d9b64 380
86748dbc 381 printf("%s %s: %0.1f kdct/s\n",
9e1586fc 382 is_idct ? "IDCT" : "DCT",
de6d9b64 383 name, (double)it1 * 1000.0 / (double)ti1);
86748dbc 384#endif
de6d9b64
FB
385}
386
0c1a9eda
ZK
387static uint8_t img_dest[64] __attribute__ ((aligned (8)));
388static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
a46a3ce4 389
504ffed1 390static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
a46a3ce4
FB
391{
392 static int init;
393 static double c8[8][8];
394 static double c4[4][4];
395 double block1[64], block2[64], block3[64];
396 double s, sum, v;
397 int i, j, k;
398
399 if (!init) {
400 init = 1;
401
402 for(i=0;i<8;i++) {
403 sum = 0;
404 for(j=0;j<8;j++) {
405 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
406 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
407 sum += c8[i][j] * c8[i][j];
408 }
409 }
115329f1 410
a46a3ce4
FB
411 for(i=0;i<4;i++) {
412 sum = 0;
413 for(j=0;j<4;j++) {
414 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
415 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
416 sum += c4[i][j] * c4[i][j];
417 }
418 }
419 }
420
421 /* butterfly */
652f0197 422 s = 0.5 * sqrt(2.0);
a46a3ce4
FB
423 for(i=0;i<4;i++) {
424 for(j=0;j<8;j++) {
652f0197
FB
425 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
426 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
a46a3ce4
FB
427 }
428 }
429
430 /* idct8 on lines */
431 for(i=0;i<8;i++) {
432 for(j=0;j<8;j++) {
433 sum = 0;
434 for(k=0;k<8;k++)
435 sum += c8[k][j] * block1[8*i+k];
436 block2[8*i+j] = sum;
437 }
438 }
439
440 /* idct4 */
441 for(i=0;i<8;i++) {
442 for(j=0;j<4;j++) {
443 /* top */
444 sum = 0;
445 for(k=0;k<4;k++)
446 sum += c4[k][j] * block2[8*(2*k)+i];
447 block3[8*(2*j)+i] = sum;
448
449 /* bottom */
450 sum = 0;
451 for(k=0;k<4;k++)
452 sum += c4[k][j] * block2[8*(2*k+1)+i];
453 block3[8*(2*j+1)+i] = sum;
454 }
455 }
456
457 /* clamp and store the result */
458 for(i=0;i<8;i++) {
459 for(j=0;j<8;j++) {
652f0197 460 v = block3[8*i+j];
a46a3ce4
FB
461 if (v < 0)
462 v = 0;
463 else if (v > 255)
464 v = 255;
465 dest[i * linesize + j] = (int)rint(v);
466 }
467 }
468}
469
504ffed1 470static void idct248_error(const char *name,
0c1a9eda 471 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
a46a3ce4
FB
472{
473 int it, i, it1, ti, ti1, err_max, v;
474
64bde197 475 AVLFG prng;
294eaa26 476
64bde197 477 av_lfg_init(&prng, 1);
115329f1 478
a46a3ce4
FB
479 /* just one test to see if code is correct (precision is less
480 important here) */
481 err_max = 0;
482 for(it=0;it<NB_ITS;it++) {
115329f1 483
652f0197
FB
484 /* XXX: use forward transform to generate values */
485 for(i=0;i<64;i++)
64bde197 486 block1[i] = av_lfg_get(&prng) % 256 - 128;
652f0197
FB
487 block1[0] += 1024;
488
a46a3ce4
FB
489 for(i=0; i<64; i++)
490 block[i]= block1[i];
491 idct248_ref(img_dest1, 8, block);
115329f1 492
652f0197
FB
493 for(i=0; i<64; i++)
494 block[i]= block1[i];
495 idct248_put(img_dest, 8, block);
115329f1 496
652f0197
FB
497 for(i=0;i<64;i++) {
498 v = abs((int)img_dest[i] - (int)img_dest1[i]);
499 if (v == 255)
500 printf("%d %d\n", img_dest[i], img_dest1[i]);
501 if (v > err_max)
502 err_max = v;
503 }
a46a3ce4
FB
504#if 0
505 printf("ref=\n");
506 for(i=0;i<8;i++) {
507 int j;
508 for(j=0;j<8;j++) {
509 printf(" %3d", img_dest1[i*8+j]);
510 }
511 printf("\n");
512 }
115329f1 513
a46a3ce4
FB
514 printf("out=\n");
515 for(i=0;i<8;i++) {
516 int j;
517 for(j=0;j<8;j++) {
518 printf(" %3d", img_dest[i*8+j]);
519 }
520 printf("\n");
521 }
522#endif
a46a3ce4
FB
523 }
524 printf("%s %s: err_inf=%d\n",
525 1 ? "IDCT248" : "DCT248",
526 name, err_max);
527
528 ti = gettime();
529 it1 = 0;
530 do {
531 for(it=0;it<NB_ITS_SPEED;it++) {
532 for(i=0; i<64; i++)
533 block[i]= block1[i];
534// memcpy(block, block1, sizeof(DCTELEM) * 64);
755bfeab 535// do not memcpy especially not fastmemcpy because it does movntq !!!
a46a3ce4
FB
536 idct248_put(img_dest, 8, block);
537 }
538 it1 += NB_ITS_SPEED;
539 ti1 = gettime() - ti;
540 } while (ti1 < 1000000);
aadd27cd 541 mmx_emms();
a46a3ce4
FB
542
543 printf("%s %s: %0.1f kdct/s\n",
544 1 ? "IDCT248" : "DCT248",
545 name, (double)it1 * 1000.0 / (double)ti1);
546}
547
504ffed1 548static void help(void)
9e1586fc 549{
86748dbc
MN
550 printf("dct-test [-i] [<test-number>]\n"
551 "test-number 0 -> test with random matrixes\n"
552 " 1 -> test with random sparse matrixes\n"
553 " 2 -> do 3. test from mpeg4 std\n"
a46a3ce4
FB
554 "-i test IDCT implementations\n"
555 "-4 test IDCT248 implementations\n");
9e1586fc
FB
556}
557
de6d9b64
FB
558int main(int argc, char **argv)
559{
a46a3ce4 560 int test_idct = 0, test_248_dct = 0;
86748dbc
MN
561 int c,i;
562 int test=1;
aadd27cd 563 cpu_flags = mm_support();
9e1586fc 564
0de74546 565 ff_ref_dct_init();
9e1586fc 566 idct_mmx_init();
f67a10cd 567
486497e0 568 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
86748dbc 569 for(i=0;i<MAX_NEG_CROP;i++) {
486497e0
MR
570 cropTbl[i] = 0;
571 cropTbl[i + MAX_NEG_CROP + 256] = 255;
86748dbc 572 }
115329f1 573
9e1586fc 574 for(;;) {
a46a3ce4 575 c = getopt(argc, argv, "ih4");
9e1586fc
FB
576 if (c == -1)
577 break;
578 switch(c) {
579 case 'i':
580 test_idct = 1;
581 break;
a46a3ce4
FB
582 case '4':
583 test_248_dct = 1;
584 break;
86748dbc 585 default :
9e1586fc
FB
586 case 'h':
587 help();
c6bdc908 588 return 0;
9e1586fc
FB
589 }
590 }
115329f1 591
86748dbc 592 if(optind <argc) test= atoi(argv[optind]);
115329f1 593
9e1586fc
FB
594 printf("ffmpeg DCT/IDCT test\n");
595
a46a3ce4 596 if (test_248_dct) {
59e6f60a 597 idct248_error("SIMPLE-C", ff_simple_idct248_put);
9e1586fc 598 } else {
3ac35bdb 599 for (i=0;algos[i].name;i++)
dafe8824 600 if (algos[i].is_idct == test_idct && !(~cpu_flags & algos[i].mm_support)) {
3ac35bdb 601 dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
a46a3ce4 602 }
9e1586fc 603 }
de6d9b64
FB
604 return 0;
605}