get rid of the last svn mentions
[libav.git] / libavcodec / dct-test.c
CommitLineData
04d7f601
DB
1/*
2 * (c) 2001 Fabrice Bellard
3ac35bdb 3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
04d7f601 4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
04d7f601
DB
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
04d7f601 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
04d7f601
DB
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
04d7f601
DB
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
983e3246 22/**
ba87f080 23 * @file
94f694a4 24 * DCT test (c) 2001 Fabrice Bellard
983e3246
MN
25 * Started from sample code by Juan J. Sierralta P.
26 */
27
de6d9b64
FB
28#include <stdlib.h>
29#include <stdio.h>
30#include <string.h>
31#include <sys/time.h>
32#include <unistd.h>
12807c8d 33#include <math.h>
de6d9b64 34
c6c98d08 35#include "libavutil/cpu.h"
ae32e509 36#include "libavutil/common.h"
294eaa26 37#include "libavutil/lfg.h"
de6d9b64 38
86748dbc 39#include "simple_idct.h"
10ac3618 40#include "aandcttab.h"
65e4c8c9 41#include "faandct.h"
6f08c541 42#include "faanidct.h"
a6493a8f 43#include "x86/idct_xvid.h"
6a813295 44#include "dctref.h"
9e1586fc 45
434df899
MN
46#undef printf
47
9686df2b
DB
48void ff_mmx_idct(DCTELEM *data);
49void ff_mmxext_idct(DCTELEM *data);
9e1586fc 50
9686df2b 51void odivx_idct_c(short *block);
86748dbc 52
3ac35bdb 53// BFIN
9686df2b
DB
54void ff_bfin_idct(DCTELEM *block);
55void ff_bfin_fdct(DCTELEM *block);
3ac35bdb
MH
56
57// ALTIVEC
9686df2b
DB
58void fdct_altivec(DCTELEM *block);
59//void idct_altivec(DCTELEM *block);?? no routine
3ac35bdb 60
479044ce 61// ARM
0926c009
MR
62void ff_j_rev_dct_arm(DCTELEM *data);
63void ff_simple_idct_arm(DCTELEM *data);
64void ff_simple_idct_armv5te(DCTELEM *data);
479044ce
MR
65void ff_simple_idct_armv6(DCTELEM *data);
66void ff_simple_idct_neon(DCTELEM *data);
3ac35bdb 67
2a839eeb
MR
68void ff_simple_idct_axp(DCTELEM *data);
69
3ac35bdb 70struct algo {
f5b67781 71 const char *name;
3ac35bdb
MH
72 enum { FDCT, IDCT } is_idct;
73 void (* func) (DCTELEM *block);
74 void (* ref) (DCTELEM *block);
875f3125 75 enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, SSE2_PERM, PARTTRANS_PERM } format;
3794b928 76 int mm_support;
3ac35bdb
MH
77};
78
79#ifndef FAAN_POSTSCALE
80#define FAAN_SCALE SCALE_PERM
81#else
82#define FAAN_SCALE NO_PERM
83#endif
84
aadd27cd
MN
85static int cpu_flags;
86
3ac35bdb 87struct algo algos[] = {
0de74546
DY
88 {"REF-DBL", 0, ff_ref_fdct, ff_ref_fdct, NO_PERM},
89 {"FAAN", 0, ff_faandct, ff_ref_fdct, FAAN_SCALE},
90 {"FAANI", 1, ff_faanidct, ff_ref_idct, NO_PERM},
91 {"IJG-AAN-INT", 0, fdct_ifast, ff_ref_fdct, SCALE_PERM},
92 {"IJG-LLM-INT", 0, ff_jpeg_fdct_islow, ff_ref_fdct, NO_PERM},
93 {"REF-DBL", 1, ff_ref_idct, ff_ref_idct, NO_PERM},
94 {"INT", 1, j_rev_dct, ff_ref_idct, MMX_PERM},
95 {"SIMPLE-C", 1, ff_simple_idct, ff_ref_idct, NO_PERM},
3ac35bdb 96
b250f9c6 97#if HAVE_MMX
7160bb71 98 {"MMX", 0, ff_fdct_mmx, ff_ref_fdct, NO_PERM, AV_CPU_FLAG_MMX},
b250f9c6 99#if HAVE_MMX2
7160bb71
SS
100 {"MMX2", 0, ff_fdct_mmx2, ff_ref_fdct, NO_PERM, AV_CPU_FLAG_MMX2},
101 {"SSE2", 0, ff_fdct_sse2, ff_ref_fdct, NO_PERM, AV_CPU_FLAG_SSE2},
94254fc0 102#endif
3ac35bdb 103
b250f9c6 104#if CONFIG_GPL
7160bb71
SS
105 {"LIBMPEG2-MMX", 1, ff_mmx_idct, ff_ref_idct, MMX_PERM, AV_CPU_FLAG_MMX},
106 {"LIBMPEG2-MMX2", 1, ff_mmxext_idct, ff_ref_idct, MMX_PERM, AV_CPU_FLAG_MMX2},
b9702de5 107#endif
7160bb71
SS
108 {"SIMPLE-MMX", 1, ff_simple_idct_mmx, ff_ref_idct, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX},
109 {"XVID-MMX", 1, ff_idct_xvid_mmx, ff_ref_idct, NO_PERM, AV_CPU_FLAG_MMX},
110 {"XVID-MMX2", 1, ff_idct_xvid_mmx2, ff_ref_idct, NO_PERM, AV_CPU_FLAG_MMX2},
111 {"XVID-SSE2", 1, ff_idct_xvid_sse2, ff_ref_idct, SSE2_PERM, AV_CPU_FLAG_SSE2},
3ac35bdb
MH
112#endif
113
b250f9c6 114#if HAVE_ALTIVEC
7160bb71 115 {"altivecfdct", 0, fdct_altivec, ff_ref_fdct, NO_PERM, AV_CPU_FLAG_ALTIVEC},
3ac35bdb
MH
116#endif
117
b250f9c6 118#if ARCH_BFIN
0de74546
DY
119 {"BFINfdct", 0, ff_bfin_fdct, ff_ref_fdct, NO_PERM},
120 {"BFINidct", 1, ff_bfin_idct, ff_ref_idct, NO_PERM},
3ac35bdb
MH
121#endif
122
b250f9c6 123#if ARCH_ARM
0926c009
MR
124 {"SIMPLE-ARM", 1, ff_simple_idct_arm, ff_ref_idct, NO_PERM },
125 {"INT-ARM", 1, ff_j_rev_dct_arm, ff_ref_idct, MMX_PERM },
b250f9c6 126#if HAVE_ARMV5TE
0926c009 127 {"SIMPLE-ARMV5TE", 1, ff_simple_idct_armv5te, ff_ref_idct, NO_PERM },
479044ce 128#endif
b250f9c6 129#if HAVE_ARMV6
0de74546 130 {"SIMPLE-ARMV6", 1, ff_simple_idct_armv6, ff_ref_idct, MMX_PERM },
479044ce 131#endif
b250f9c6 132#if HAVE_NEON
0de74546 133 {"SIMPLE-NEON", 1, ff_simple_idct_neon, ff_ref_idct, PARTTRANS_PERM },
479044ce 134#endif
a2fc0f6a 135#endif /* ARCH_ARM */
479044ce 136
2a839eeb 137#if ARCH_ALPHA
0de74546 138 {"SIMPLE-ALPHA", 1, ff_simple_idct_axp, ff_ref_idct, NO_PERM },
2a839eeb
MR
139#endif
140
3ac35bdb
MH
141 { 0 }
142};
143
de6d9b64 144#define AANSCALE_BITS 12
de6d9b64 145
486497e0 146uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
86748dbc 147
504ffed1 148static int64_t gettime(void)
de6d9b64
FB
149{
150 struct timeval tv;
151 gettimeofday(&tv,NULL);
0c1a9eda 152 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
de6d9b64
FB
153}
154
155#define NB_ITS 20000
156#define NB_ITS_SPEED 50000
157
9e1586fc
FB
158static short idct_mmx_perm[64];
159
86748dbc 160static short idct_simple_mmx_perm[64]={
bb270c08
DB
161 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
162 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
163 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
164 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
165 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
166 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
167 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
168 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
86748dbc
MN
169};
170
ad246860
AS
171static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
172
504ffed1 173static void idct_mmx_init(void)
9e1586fc
FB
174{
175 int i;
176
177 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
178 for (i = 0; i < 64; i++) {
bb270c08
DB
179 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
180// idct_simple_mmx_perm[i] = simple_block_permute_op(i);
9e1586fc
FB
181 }
182}
183
c6727809
MR
184DECLARE_ALIGNED(16, static DCTELEM, block)[64];
185DECLARE_ALIGNED(8, static DCTELEM, block1)[64];
186DECLARE_ALIGNED(8, static DCTELEM, block_org)[64];
9e1586fc 187
aadd27cd
MN
188static inline void mmx_emms(void)
189{
b250f9c6 190#if HAVE_MMX
7160bb71 191 if (cpu_flags & AV_CPU_FLAG_MMX)
be449fca 192 __asm__ volatile ("emms\n\t");
aadd27cd
MN
193#endif
194}
195
504ffed1 196static void dct_error(const char *name, int is_idct,
9e1586fc 197 void (*fdct_func)(DCTELEM *block),
3ac35bdb 198 void (*fdct_ref)(DCTELEM *block), int form, int test)
de6d9b64
FB
199{
200 int it, i, scale;
de6d9b64 201 int err_inf, v;
0c1a9eda
ZK
202 int64_t err2, ti, ti1, it1;
203 int64_t sysErr[64], sysErrMax=0;
86748dbc 204 int maxout=0;
86748dbc 205 int blockSumErrMax=0, blockSumErr;
64bde197 206 AVLFG prng;
de6d9b64 207
64bde197 208 av_lfg_init(&prng, 1);
de6d9b64
FB
209
210 err_inf = 0;
211 err2 = 0;
86748dbc 212 for(i=0; i<64; i++) sysErr[i]=0;
de6d9b64 213 for(it=0;it<NB_ITS;it++) {
86748dbc
MN
214 for(i=0;i<64;i++)
215 block1[i] = 0;
216 switch(test){
115329f1 217 case 0:
86748dbc 218 for(i=0;i<64;i++)
64bde197 219 block1[i] = (av_lfg_get(&prng) % 512) -256;
ad324c93 220 if (is_idct){
0de74546 221 ff_ref_fdct(block1);
ad324c93
MN
222
223 for(i=0;i<64;i++)
224 block1[i]>>=3;
225 }
86748dbc
MN
226 break;
227 case 1:{
64bde197 228 int num = av_lfg_get(&prng) % 10 + 1;
86748dbc 229 for(i=0;i<num;i++)
64bde197 230 block1[av_lfg_get(&prng) % 64] = av_lfg_get(&prng) % 512 -256;
86748dbc
MN
231 }break;
232 case 2:
64bde197 233 block1[0] = av_lfg_get(&prng) % 4096 - 2048;
86748dbc
MN
234 block1[63]= (block1[0]&1)^1;
235 break;
236 }
9e1586fc 237
86748dbc
MN
238#if 0 // simulate mismatch control
239{ int sum=0;
240 for(i=0;i<64;i++)
241 sum+=block1[i];
242
115329f1 243 if((sum&1)==0) block1[63]^=1;
86748dbc
MN
244}
245#endif
246
247 for(i=0; i<64; i++)
248 block_org[i]= block1[i];
9e1586fc 249
3ac35bdb 250 if (form == MMX_PERM) {
86748dbc 251 for(i=0;i<64;i++)
9e1586fc 252 block[idct_mmx_perm[i]] = block1[i];
3ac35bdb 253 } else if (form == MMX_SIMPLE_PERM) {
86748dbc
MN
254 for(i=0;i<64;i++)
255 block[idct_simple_mmx_perm[i]] = block1[i];
256
ad246860
AS
257 } else if (form == SSE2_PERM) {
258 for(i=0; i<64; i++)
259 block[(i&0x38) | idct_sse2_row_perm[i&7]] = block1[i];
875f3125
MR
260 } else if (form == PARTTRANS_PERM) {
261 for(i=0; i<64; i++)
262 block[(i&0x24) | ((i&3)<<3) | ((i>>3)&3)] = block1[i];
bb270c08 263 } else {
86748dbc
MN
264 for(i=0; i<64; i++)
265 block[i]= block1[i];
9e1586fc 266 }
86748dbc
MN
267#if 0 // simulate mismatch control for tested IDCT but not the ref
268{ int sum=0;
269 for(i=0;i<64;i++)
270 sum+=block[i];
271
115329f1 272 if((sum&1)==0) block[63]^=1;
86748dbc
MN
273}
274#endif
9e1586fc 275
de6d9b64 276 fdct_func(block);
aadd27cd 277 mmx_emms();
9e1586fc 278
3ac35bdb 279 if (form == SCALE_PERM) {
de6d9b64 280 for(i=0; i<64; i++) {
10ac3618 281 scale = 8*(1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
86748dbc
MN
282 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
283 }
284 }
285
9e1586fc 286 fdct_ref(block1);
de6d9b64 287
86748dbc 288 blockSumErr=0;
de6d9b64
FB
289 for(i=0;i<64;i++) {
290 v = abs(block[i] - block1[i]);
291 if (v > err_inf)
292 err_inf = v;
293 err2 += v * v;
bb270c08
DB
294 sysErr[i] += block[i] - block1[i];
295 blockSumErr += v;
296 if( abs(block[i])>maxout) maxout=abs(block[i]);
de6d9b64 297 }
86748dbc
MN
298 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
299#if 0 // print different matrix pairs
300 if(blockSumErr){
301 printf("\n");
302 for(i=0; i<64; i++){
303 if((i&7)==0) printf("\n");
304 printf("%4d ", block_org[i]);
305 }
306 for(i=0; i<64; i++){
307 if((i&7)==0) printf("\n");
308 printf("%4d ", block[i] - block1[i]);
309 }
310 }
311#endif
312 }
ae32e509 313 for(i=0; i<64; i++) sysErrMax= FFMAX(sysErrMax, FFABS(sysErr[i]));
115329f1 314
86748dbc
MN
315#if 1 // dump systematic errors
316 for(i=0; i<64; i++){
bb270c08 317 if(i%8==0) printf("\n");
2029e934 318 printf("%7d ", (int)sysErr[i]);
de6d9b64 319 }
86748dbc
MN
320 printf("\n");
321#endif
115329f1 322
86748dbc 323 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
9e1586fc 324 is_idct ? "IDCT" : "DCT",
86748dbc
MN
325 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
326#if 1 //Speed test
de6d9b64 327 /* speed test */
86748dbc
MN
328 for(i=0;i<64;i++)
329 block1[i] = 0;
330 switch(test){
115329f1 331 case 0:
86748dbc 332 for(i=0;i<64;i++)
64bde197 333 block1[i] = av_lfg_get(&prng) % 512 -256;
ad324c93 334 if (is_idct){
0de74546 335 ff_ref_fdct(block1);
ad324c93
MN
336
337 for(i=0;i<64;i++)
338 block1[i]>>=3;
339 }
86748dbc
MN
340 break;
341 case 1:{
342 case 2:
64bde197
DB
343 block1[0] = av_lfg_get(&prng) % 512 -256;
344 block1[1] = av_lfg_get(&prng) % 512 -256;
345 block1[2] = av_lfg_get(&prng) % 512 -256;
346 block1[3] = av_lfg_get(&prng) % 512 -256;
86748dbc
MN
347 }break;
348 }
de6d9b64 349
3ac35bdb 350 if (form == MMX_PERM) {
86748dbc 351 for(i=0;i<64;i++)
9e1586fc 352 block[idct_mmx_perm[i]] = block1[i];
3ac35bdb 353 } else if(form == MMX_SIMPLE_PERM) {
86748dbc
MN
354 for(i=0;i<64;i++)
355 block[idct_simple_mmx_perm[i]] = block1[i];
356 } else {
357 for(i=0; i<64; i++)
358 block[i]= block1[i];
9e1586fc
FB
359 }
360
de6d9b64
FB
361 ti = gettime();
362 it1 = 0;
363 do {
364 for(it=0;it<NB_ITS_SPEED;it++) {
86748dbc
MN
365 for(i=0; i<64; i++)
366 block[i]= block1[i];
367// memcpy(block, block1, sizeof(DCTELEM) * 64);
755bfeab 368// do not memcpy especially not fastmemcpy because it does movntq !!!
de6d9b64
FB
369 fdct_func(block);
370 }
371 it1 += NB_ITS_SPEED;
372 ti1 = gettime() - ti;
373 } while (ti1 < 1000000);
aadd27cd 374 mmx_emms();
de6d9b64 375
86748dbc 376 printf("%s %s: %0.1f kdct/s\n",
9e1586fc 377 is_idct ? "IDCT" : "DCT",
de6d9b64 378 name, (double)it1 * 1000.0 / (double)ti1);
86748dbc 379#endif
de6d9b64
FB
380}
381
c6727809
MR
382DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
383DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];
a46a3ce4 384
504ffed1 385static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
a46a3ce4
FB
386{
387 static int init;
388 static double c8[8][8];
389 static double c4[4][4];
390 double block1[64], block2[64], block3[64];
391 double s, sum, v;
392 int i, j, k;
393
394 if (!init) {
395 init = 1;
396
397 for(i=0;i<8;i++) {
398 sum = 0;
399 for(j=0;j<8;j++) {
400 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
401 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
402 sum += c8[i][j] * c8[i][j];
403 }
404 }
115329f1 405
a46a3ce4
FB
406 for(i=0;i<4;i++) {
407 sum = 0;
408 for(j=0;j<4;j++) {
409 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
410 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
411 sum += c4[i][j] * c4[i][j];
412 }
413 }
414 }
415
416 /* butterfly */
652f0197 417 s = 0.5 * sqrt(2.0);
a46a3ce4
FB
418 for(i=0;i<4;i++) {
419 for(j=0;j<8;j++) {
652f0197
FB
420 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
421 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
a46a3ce4
FB
422 }
423 }
424
425 /* idct8 on lines */
426 for(i=0;i<8;i++) {
427 for(j=0;j<8;j++) {
428 sum = 0;
429 for(k=0;k<8;k++)
430 sum += c8[k][j] * block1[8*i+k];
431 block2[8*i+j] = sum;
432 }
433 }
434
435 /* idct4 */
436 for(i=0;i<8;i++) {
437 for(j=0;j<4;j++) {
438 /* top */
439 sum = 0;
440 for(k=0;k<4;k++)
441 sum += c4[k][j] * block2[8*(2*k)+i];
442 block3[8*(2*j)+i] = sum;
443
444 /* bottom */
445 sum = 0;
446 for(k=0;k<4;k++)
447 sum += c4[k][j] * block2[8*(2*k+1)+i];
448 block3[8*(2*j+1)+i] = sum;
449 }
450 }
451
452 /* clamp and store the result */
453 for(i=0;i<8;i++) {
454 for(j=0;j<8;j++) {
652f0197 455 v = block3[8*i+j];
a46a3ce4
FB
456 if (v < 0)
457 v = 0;
458 else if (v > 255)
459 v = 255;
460 dest[i * linesize + j] = (int)rint(v);
461 }
462 }
463}
464
504ffed1 465static void idct248_error(const char *name,
0c1a9eda 466 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
a46a3ce4
FB
467{
468 int it, i, it1, ti, ti1, err_max, v;
469
64bde197 470 AVLFG prng;
294eaa26 471
64bde197 472 av_lfg_init(&prng, 1);
115329f1 473
a46a3ce4
FB
474 /* just one test to see if code is correct (precision is less
475 important here) */
476 err_max = 0;
477 for(it=0;it<NB_ITS;it++) {
115329f1 478
652f0197
FB
479 /* XXX: use forward transform to generate values */
480 for(i=0;i<64;i++)
64bde197 481 block1[i] = av_lfg_get(&prng) % 256 - 128;
652f0197
FB
482 block1[0] += 1024;
483
a46a3ce4
FB
484 for(i=0; i<64; i++)
485 block[i]= block1[i];
486 idct248_ref(img_dest1, 8, block);
115329f1 487
652f0197
FB
488 for(i=0; i<64; i++)
489 block[i]= block1[i];
490 idct248_put(img_dest, 8, block);
115329f1 491
652f0197
FB
492 for(i=0;i<64;i++) {
493 v = abs((int)img_dest[i] - (int)img_dest1[i]);
494 if (v == 255)
495 printf("%d %d\n", img_dest[i], img_dest1[i]);
496 if (v > err_max)
497 err_max = v;
498 }
a46a3ce4
FB
499#if 0
500 printf("ref=\n");
501 for(i=0;i<8;i++) {
502 int j;
503 for(j=0;j<8;j++) {
504 printf(" %3d", img_dest1[i*8+j]);
505 }
506 printf("\n");
507 }
115329f1 508
a46a3ce4
FB
509 printf("out=\n");
510 for(i=0;i<8;i++) {
511 int j;
512 for(j=0;j<8;j++) {
513 printf(" %3d", img_dest[i*8+j]);
514 }
515 printf("\n");
516 }
517#endif
a46a3ce4
FB
518 }
519 printf("%s %s: err_inf=%d\n",
520 1 ? "IDCT248" : "DCT248",
521 name, err_max);
522
523 ti = gettime();
524 it1 = 0;
525 do {
526 for(it=0;it<NB_ITS_SPEED;it++) {
527 for(i=0; i<64; i++)
528 block[i]= block1[i];
529// memcpy(block, block1, sizeof(DCTELEM) * 64);
755bfeab 530// do not memcpy especially not fastmemcpy because it does movntq !!!
a46a3ce4
FB
531 idct248_put(img_dest, 8, block);
532 }
533 it1 += NB_ITS_SPEED;
534 ti1 = gettime() - ti;
535 } while (ti1 < 1000000);
aadd27cd 536 mmx_emms();
a46a3ce4
FB
537
538 printf("%s %s: %0.1f kdct/s\n",
539 1 ? "IDCT248" : "DCT248",
540 name, (double)it1 * 1000.0 / (double)ti1);
541}
542
504ffed1 543static void help(void)
9e1586fc 544{
86748dbc
MN
545 printf("dct-test [-i] [<test-number>]\n"
546 "test-number 0 -> test with random matrixes\n"
547 " 1 -> test with random sparse matrixes\n"
548 " 2 -> do 3. test from mpeg4 std\n"
a46a3ce4
FB
549 "-i test IDCT implementations\n"
550 "-4 test IDCT248 implementations\n");
9e1586fc
FB
551}
552
de6d9b64
FB
553int main(int argc, char **argv)
554{
a46a3ce4 555 int test_idct = 0, test_248_dct = 0;
86748dbc
MN
556 int c,i;
557 int test=1;
c6c98d08 558 cpu_flags = av_get_cpu_flags();
9e1586fc 559
0de74546 560 ff_ref_dct_init();
9e1586fc 561 idct_mmx_init();
f67a10cd 562
486497e0 563 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
86748dbc 564 for(i=0;i<MAX_NEG_CROP;i++) {
486497e0
MR
565 cropTbl[i] = 0;
566 cropTbl[i + MAX_NEG_CROP + 256] = 255;
86748dbc 567 }
115329f1 568
9e1586fc 569 for(;;) {
a46a3ce4 570 c = getopt(argc, argv, "ih4");
9e1586fc
FB
571 if (c == -1)
572 break;
573 switch(c) {
574 case 'i':
575 test_idct = 1;
576 break;
a46a3ce4
FB
577 case '4':
578 test_248_dct = 1;
579 break;
86748dbc 580 default :
9e1586fc
FB
581 case 'h':
582 help();
c6bdc908 583 return 0;
9e1586fc
FB
584 }
585 }
115329f1 586
86748dbc 587 if(optind <argc) test= atoi(argv[optind]);
115329f1 588
9e1586fc
FB
589 printf("ffmpeg DCT/IDCT test\n");
590
a46a3ce4 591 if (test_248_dct) {
59e6f60a 592 idct248_error("SIMPLE-C", ff_simple_idct248_put);
9e1586fc 593 } else {
3ac35bdb 594 for (i=0;algos[i].name;i++)
dafe8824 595 if (algos[i].is_idct == test_idct && !(~cpu_flags & algos[i].mm_support)) {
3ac35bdb 596 dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
a46a3ce4 597 }
9e1586fc 598 }
de6d9b64
FB
599 return 0;
600}