Add missing internal.h #include for brktimegm(), fixes the warning:
[libav.git] / libavcodec / dct-test.c
CommitLineData
04d7f601
DB
1/*
2 * (c) 2001 Fabrice Bellard
3ac35bdb 3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
04d7f601 4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
04d7f601
DB
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
04d7f601 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
04d7f601
DB
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
04d7f601
DB
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
983e3246 22/**
bad5537e 23 * @file libavcodec/dct-test.c
94f694a4 24 * DCT test (c) 2001 Fabrice Bellard
983e3246
MN
25 * Started from sample code by Juan J. Sierralta P.
26 */
27
de6d9b64
FB
28#include <stdlib.h>
29#include <stdio.h>
30#include <string.h>
31#include <sys/time.h>
32#include <unistd.h>
12807c8d 33#include <math.h>
de6d9b64 34
ae32e509 35#include "libavutil/common.h"
294eaa26 36#include "libavutil/lfg.h"
de6d9b64 37
86748dbc 38#include "simple_idct.h"
10ac3618 39#include "aandcttab.h"
65e4c8c9 40#include "faandct.h"
6f08c541 41#include "faanidct.h"
a6493a8f 42#include "x86/idct_xvid.h"
6a813295 43#include "dctref.h"
9e1586fc 44
434df899
MN
45#undef printf
46
9686df2b
DB
47void ff_mmx_idct(DCTELEM *data);
48void ff_mmxext_idct(DCTELEM *data);
9e1586fc 49
9686df2b 50void odivx_idct_c(short *block);
86748dbc 51
3ac35bdb 52// BFIN
9686df2b
DB
53void ff_bfin_idct(DCTELEM *block);
54void ff_bfin_fdct(DCTELEM *block);
3ac35bdb
MH
55
56// ALTIVEC
9686df2b
DB
57void fdct_altivec(DCTELEM *block);
58//void idct_altivec(DCTELEM *block);?? no routine
3ac35bdb 59
479044ce 60// ARM
0926c009
MR
61void ff_j_rev_dct_arm(DCTELEM *data);
62void ff_simple_idct_arm(DCTELEM *data);
63void ff_simple_idct_armv5te(DCTELEM *data);
479044ce
MR
64void ff_simple_idct_armv6(DCTELEM *data);
65void ff_simple_idct_neon(DCTELEM *data);
3ac35bdb 66
2a839eeb
MR
67void ff_simple_idct_axp(DCTELEM *data);
68
3ac35bdb 69struct algo {
f5b67781 70 const char *name;
3ac35bdb
MH
71 enum { FDCT, IDCT } is_idct;
72 void (* func) (DCTELEM *block);
73 void (* ref) (DCTELEM *block);
875f3125 74 enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, SSE2_PERM, PARTTRANS_PERM } format;
3794b928 75 int mm_support;
3ac35bdb
MH
76};
77
78#ifndef FAAN_POSTSCALE
79#define FAAN_SCALE SCALE_PERM
80#else
81#define FAAN_SCALE NO_PERM
82#endif
83
aadd27cd
MN
84static int cpu_flags;
85
3ac35bdb 86struct algo algos[] = {
0de74546
DY
87 {"REF-DBL", 0, ff_ref_fdct, ff_ref_fdct, NO_PERM},
88 {"FAAN", 0, ff_faandct, ff_ref_fdct, FAAN_SCALE},
89 {"FAANI", 1, ff_faanidct, ff_ref_idct, NO_PERM},
90 {"IJG-AAN-INT", 0, fdct_ifast, ff_ref_fdct, SCALE_PERM},
91 {"IJG-LLM-INT", 0, ff_jpeg_fdct_islow, ff_ref_fdct, NO_PERM},
92 {"REF-DBL", 1, ff_ref_idct, ff_ref_idct, NO_PERM},
93 {"INT", 1, j_rev_dct, ff_ref_idct, MMX_PERM},
94 {"SIMPLE-C", 1, ff_simple_idct, ff_ref_idct, NO_PERM},
3ac35bdb 95
b250f9c6 96#if HAVE_MMX
0de74546 97 {"MMX", 0, ff_fdct_mmx, ff_ref_fdct, NO_PERM, FF_MM_MMX},
b250f9c6 98#if HAVE_MMX2
147a90a3 99 {"MMX2", 0, ff_fdct_mmx2, ff_ref_fdct, NO_PERM, FF_MM_MMX2},
0de74546 100 {"SSE2", 0, ff_fdct_sse2, ff_ref_fdct, NO_PERM, FF_MM_SSE2},
94254fc0 101#endif
3ac35bdb 102
b250f9c6 103#if CONFIG_GPL
0de74546 104 {"LIBMPEG2-MMX", 1, ff_mmx_idct, ff_ref_idct, MMX_PERM, FF_MM_MMX},
147a90a3 105 {"LIBMPEG2-MMX2", 1, ff_mmxext_idct, ff_ref_idct, MMX_PERM, FF_MM_MMX2},
b9702de5 106#endif
0de74546
DY
107 {"SIMPLE-MMX", 1, ff_simple_idct_mmx, ff_ref_idct, MMX_SIMPLE_PERM, FF_MM_MMX},
108 {"XVID-MMX", 1, ff_idct_xvid_mmx, ff_ref_idct, NO_PERM, FF_MM_MMX},
147a90a3 109 {"XVID-MMX2", 1, ff_idct_xvid_mmx2, ff_ref_idct, NO_PERM, FF_MM_MMX2},
0de74546 110 {"XVID-SSE2", 1, ff_idct_xvid_sse2, ff_ref_idct, SSE2_PERM, FF_MM_SSE2},
3ac35bdb
MH
111#endif
112
b250f9c6 113#if HAVE_ALTIVEC
0de74546 114 {"altivecfdct", 0, fdct_altivec, ff_ref_fdct, NO_PERM, FF_MM_ALTIVEC},
3ac35bdb
MH
115#endif
116
b250f9c6 117#if ARCH_BFIN
0de74546
DY
118 {"BFINfdct", 0, ff_bfin_fdct, ff_ref_fdct, NO_PERM},
119 {"BFINidct", 1, ff_bfin_idct, ff_ref_idct, NO_PERM},
3ac35bdb
MH
120#endif
121
b250f9c6 122#if ARCH_ARM
0926c009
MR
123 {"SIMPLE-ARM", 1, ff_simple_idct_arm, ff_ref_idct, NO_PERM },
124 {"INT-ARM", 1, ff_j_rev_dct_arm, ff_ref_idct, MMX_PERM },
b250f9c6 125#if HAVE_ARMV5TE
0926c009 126 {"SIMPLE-ARMV5TE", 1, ff_simple_idct_armv5te, ff_ref_idct, NO_PERM },
479044ce 127#endif
b250f9c6 128#if HAVE_ARMV6
0de74546 129 {"SIMPLE-ARMV6", 1, ff_simple_idct_armv6, ff_ref_idct, MMX_PERM },
479044ce 130#endif
b250f9c6 131#if HAVE_NEON
0de74546 132 {"SIMPLE-NEON", 1, ff_simple_idct_neon, ff_ref_idct, PARTTRANS_PERM },
479044ce 133#endif
a2fc0f6a 134#endif /* ARCH_ARM */
479044ce 135
2a839eeb 136#if ARCH_ALPHA
0de74546 137 {"SIMPLE-ALPHA", 1, ff_simple_idct_axp, ff_ref_idct, NO_PERM },
2a839eeb
MR
138#endif
139
3ac35bdb
MH
140 { 0 }
141};
142
de6d9b64 143#define AANSCALE_BITS 12
de6d9b64 144
486497e0 145uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
86748dbc 146
504ffed1 147static int64_t gettime(void)
de6d9b64
FB
148{
149 struct timeval tv;
150 gettimeofday(&tv,NULL);
0c1a9eda 151 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
de6d9b64
FB
152}
153
154#define NB_ITS 20000
155#define NB_ITS_SPEED 50000
156
9e1586fc
FB
157static short idct_mmx_perm[64];
158
86748dbc 159static short idct_simple_mmx_perm[64]={
bb270c08
DB
160 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
161 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
162 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
163 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
164 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
165 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
166 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
167 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
86748dbc
MN
168};
169
ad246860
AS
170static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
171
504ffed1 172static void idct_mmx_init(void)
9e1586fc
FB
173{
174 int i;
175
176 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
177 for (i = 0; i < 64; i++) {
bb270c08
DB
178 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
179// idct_simple_mmx_perm[i] = simple_block_permute_op(i);
9e1586fc
FB
180 }
181}
182
c6727809
MR
183DECLARE_ALIGNED(16, static DCTELEM, block)[64];
184DECLARE_ALIGNED(8, static DCTELEM, block1)[64];
185DECLARE_ALIGNED(8, static DCTELEM, block_org)[64];
9e1586fc 186
aadd27cd
MN
187static inline void mmx_emms(void)
188{
b250f9c6 189#if HAVE_MMX
82d1605f 190 if (cpu_flags & FF_MM_MMX)
be449fca 191 __asm__ volatile ("emms\n\t");
aadd27cd
MN
192#endif
193}
194
504ffed1 195static void dct_error(const char *name, int is_idct,
9e1586fc 196 void (*fdct_func)(DCTELEM *block),
3ac35bdb 197 void (*fdct_ref)(DCTELEM *block), int form, int test)
de6d9b64
FB
198{
199 int it, i, scale;
de6d9b64 200 int err_inf, v;
0c1a9eda
ZK
201 int64_t err2, ti, ti1, it1;
202 int64_t sysErr[64], sysErrMax=0;
86748dbc 203 int maxout=0;
86748dbc 204 int blockSumErrMax=0, blockSumErr;
64bde197 205 AVLFG prng;
de6d9b64 206
64bde197 207 av_lfg_init(&prng, 1);
de6d9b64
FB
208
209 err_inf = 0;
210 err2 = 0;
86748dbc 211 for(i=0; i<64; i++) sysErr[i]=0;
de6d9b64 212 for(it=0;it<NB_ITS;it++) {
86748dbc
MN
213 for(i=0;i<64;i++)
214 block1[i] = 0;
215 switch(test){
115329f1 216 case 0:
86748dbc 217 for(i=0;i<64;i++)
64bde197 218 block1[i] = (av_lfg_get(&prng) % 512) -256;
ad324c93 219 if (is_idct){
0de74546 220 ff_ref_fdct(block1);
ad324c93
MN
221
222 for(i=0;i<64;i++)
223 block1[i]>>=3;
224 }
86748dbc
MN
225 break;
226 case 1:{
64bde197 227 int num = av_lfg_get(&prng) % 10 + 1;
86748dbc 228 for(i=0;i<num;i++)
64bde197 229 block1[av_lfg_get(&prng) % 64] = av_lfg_get(&prng) % 512 -256;
86748dbc
MN
230 }break;
231 case 2:
64bde197 232 block1[0] = av_lfg_get(&prng) % 4096 - 2048;
86748dbc
MN
233 block1[63]= (block1[0]&1)^1;
234 break;
235 }
9e1586fc 236
86748dbc
MN
237#if 0 // simulate mismatch control
238{ int sum=0;
239 for(i=0;i<64;i++)
240 sum+=block1[i];
241
115329f1 242 if((sum&1)==0) block1[63]^=1;
86748dbc
MN
243}
244#endif
245
246 for(i=0; i<64; i++)
247 block_org[i]= block1[i];
9e1586fc 248
3ac35bdb 249 if (form == MMX_PERM) {
86748dbc 250 for(i=0;i<64;i++)
9e1586fc 251 block[idct_mmx_perm[i]] = block1[i];
3ac35bdb 252 } else if (form == MMX_SIMPLE_PERM) {
86748dbc
MN
253 for(i=0;i<64;i++)
254 block[idct_simple_mmx_perm[i]] = block1[i];
255
ad246860
AS
256 } else if (form == SSE2_PERM) {
257 for(i=0; i<64; i++)
258 block[(i&0x38) | idct_sse2_row_perm[i&7]] = block1[i];
875f3125
MR
259 } else if (form == PARTTRANS_PERM) {
260 for(i=0; i<64; i++)
261 block[(i&0x24) | ((i&3)<<3) | ((i>>3)&3)] = block1[i];
bb270c08 262 } else {
86748dbc
MN
263 for(i=0; i<64; i++)
264 block[i]= block1[i];
9e1586fc 265 }
86748dbc
MN
266#if 0 // simulate mismatch control for tested IDCT but not the ref
267{ int sum=0;
268 for(i=0;i<64;i++)
269 sum+=block[i];
270
115329f1 271 if((sum&1)==0) block[63]^=1;
86748dbc
MN
272}
273#endif
9e1586fc 274
de6d9b64 275 fdct_func(block);
aadd27cd 276 mmx_emms();
9e1586fc 277
3ac35bdb 278 if (form == SCALE_PERM) {
de6d9b64 279 for(i=0; i<64; i++) {
10ac3618 280 scale = 8*(1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
86748dbc
MN
281 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
282 }
283 }
284
9e1586fc 285 fdct_ref(block1);
de6d9b64 286
86748dbc 287 blockSumErr=0;
de6d9b64
FB
288 for(i=0;i<64;i++) {
289 v = abs(block[i] - block1[i]);
290 if (v > err_inf)
291 err_inf = v;
292 err2 += v * v;
bb270c08
DB
293 sysErr[i] += block[i] - block1[i];
294 blockSumErr += v;
295 if( abs(block[i])>maxout) maxout=abs(block[i]);
de6d9b64 296 }
86748dbc
MN
297 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
298#if 0 // print different matrix pairs
299 if(blockSumErr){
300 printf("\n");
301 for(i=0; i<64; i++){
302 if((i&7)==0) printf("\n");
303 printf("%4d ", block_org[i]);
304 }
305 for(i=0; i<64; i++){
306 if((i&7)==0) printf("\n");
307 printf("%4d ", block[i] - block1[i]);
308 }
309 }
310#endif
311 }
ae32e509 312 for(i=0; i<64; i++) sysErrMax= FFMAX(sysErrMax, FFABS(sysErr[i]));
115329f1 313
86748dbc
MN
314#if 1 // dump systematic errors
315 for(i=0; i<64; i++){
bb270c08 316 if(i%8==0) printf("\n");
2029e934 317 printf("%7d ", (int)sysErr[i]);
de6d9b64 318 }
86748dbc
MN
319 printf("\n");
320#endif
115329f1 321
86748dbc 322 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
9e1586fc 323 is_idct ? "IDCT" : "DCT",
86748dbc
MN
324 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
325#if 1 //Speed test
de6d9b64 326 /* speed test */
86748dbc
MN
327 for(i=0;i<64;i++)
328 block1[i] = 0;
329 switch(test){
115329f1 330 case 0:
86748dbc 331 for(i=0;i<64;i++)
64bde197 332 block1[i] = av_lfg_get(&prng) % 512 -256;
ad324c93 333 if (is_idct){
0de74546 334 ff_ref_fdct(block1);
ad324c93
MN
335
336 for(i=0;i<64;i++)
337 block1[i]>>=3;
338 }
86748dbc
MN
339 break;
340 case 1:{
341 case 2:
64bde197
DB
342 block1[0] = av_lfg_get(&prng) % 512 -256;
343 block1[1] = av_lfg_get(&prng) % 512 -256;
344 block1[2] = av_lfg_get(&prng) % 512 -256;
345 block1[3] = av_lfg_get(&prng) % 512 -256;
86748dbc
MN
346 }break;
347 }
de6d9b64 348
3ac35bdb 349 if (form == MMX_PERM) {
86748dbc 350 for(i=0;i<64;i++)
9e1586fc 351 block[idct_mmx_perm[i]] = block1[i];
3ac35bdb 352 } else if(form == MMX_SIMPLE_PERM) {
86748dbc
MN
353 for(i=0;i<64;i++)
354 block[idct_simple_mmx_perm[i]] = block1[i];
355 } else {
356 for(i=0; i<64; i++)
357 block[i]= block1[i];
9e1586fc
FB
358 }
359
de6d9b64
FB
360 ti = gettime();
361 it1 = 0;
362 do {
363 for(it=0;it<NB_ITS_SPEED;it++) {
86748dbc
MN
364 for(i=0; i<64; i++)
365 block[i]= block1[i];
366// memcpy(block, block1, sizeof(DCTELEM) * 64);
755bfeab 367// do not memcpy especially not fastmemcpy because it does movntq !!!
de6d9b64
FB
368 fdct_func(block);
369 }
370 it1 += NB_ITS_SPEED;
371 ti1 = gettime() - ti;
372 } while (ti1 < 1000000);
aadd27cd 373 mmx_emms();
de6d9b64 374
86748dbc 375 printf("%s %s: %0.1f kdct/s\n",
9e1586fc 376 is_idct ? "IDCT" : "DCT",
de6d9b64 377 name, (double)it1 * 1000.0 / (double)ti1);
86748dbc 378#endif
de6d9b64
FB
379}
380
c6727809
MR
381DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
382DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];
a46a3ce4 383
504ffed1 384static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
a46a3ce4
FB
385{
386 static int init;
387 static double c8[8][8];
388 static double c4[4][4];
389 double block1[64], block2[64], block3[64];
390 double s, sum, v;
391 int i, j, k;
392
393 if (!init) {
394 init = 1;
395
396 for(i=0;i<8;i++) {
397 sum = 0;
398 for(j=0;j<8;j++) {
399 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
400 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
401 sum += c8[i][j] * c8[i][j];
402 }
403 }
115329f1 404
a46a3ce4
FB
405 for(i=0;i<4;i++) {
406 sum = 0;
407 for(j=0;j<4;j++) {
408 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
409 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
410 sum += c4[i][j] * c4[i][j];
411 }
412 }
413 }
414
415 /* butterfly */
652f0197 416 s = 0.5 * sqrt(2.0);
a46a3ce4
FB
417 for(i=0;i<4;i++) {
418 for(j=0;j<8;j++) {
652f0197
FB
419 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
420 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
a46a3ce4
FB
421 }
422 }
423
424 /* idct8 on lines */
425 for(i=0;i<8;i++) {
426 for(j=0;j<8;j++) {
427 sum = 0;
428 for(k=0;k<8;k++)
429 sum += c8[k][j] * block1[8*i+k];
430 block2[8*i+j] = sum;
431 }
432 }
433
434 /* idct4 */
435 for(i=0;i<8;i++) {
436 for(j=0;j<4;j++) {
437 /* top */
438 sum = 0;
439 for(k=0;k<4;k++)
440 sum += c4[k][j] * block2[8*(2*k)+i];
441 block3[8*(2*j)+i] = sum;
442
443 /* bottom */
444 sum = 0;
445 for(k=0;k<4;k++)
446 sum += c4[k][j] * block2[8*(2*k+1)+i];
447 block3[8*(2*j+1)+i] = sum;
448 }
449 }
450
451 /* clamp and store the result */
452 for(i=0;i<8;i++) {
453 for(j=0;j<8;j++) {
652f0197 454 v = block3[8*i+j];
a46a3ce4
FB
455 if (v < 0)
456 v = 0;
457 else if (v > 255)
458 v = 255;
459 dest[i * linesize + j] = (int)rint(v);
460 }
461 }
462}
463
504ffed1 464static void idct248_error(const char *name,
0c1a9eda 465 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
a46a3ce4
FB
466{
467 int it, i, it1, ti, ti1, err_max, v;
468
64bde197 469 AVLFG prng;
294eaa26 470
64bde197 471 av_lfg_init(&prng, 1);
115329f1 472
a46a3ce4
FB
473 /* just one test to see if code is correct (precision is less
474 important here) */
475 err_max = 0;
476 for(it=0;it<NB_ITS;it++) {
115329f1 477
652f0197
FB
478 /* XXX: use forward transform to generate values */
479 for(i=0;i<64;i++)
64bde197 480 block1[i] = av_lfg_get(&prng) % 256 - 128;
652f0197
FB
481 block1[0] += 1024;
482
a46a3ce4
FB
483 for(i=0; i<64; i++)
484 block[i]= block1[i];
485 idct248_ref(img_dest1, 8, block);
115329f1 486
652f0197
FB
487 for(i=0; i<64; i++)
488 block[i]= block1[i];
489 idct248_put(img_dest, 8, block);
115329f1 490
652f0197
FB
491 for(i=0;i<64;i++) {
492 v = abs((int)img_dest[i] - (int)img_dest1[i]);
493 if (v == 255)
494 printf("%d %d\n", img_dest[i], img_dest1[i]);
495 if (v > err_max)
496 err_max = v;
497 }
a46a3ce4
FB
498#if 0
499 printf("ref=\n");
500 for(i=0;i<8;i++) {
501 int j;
502 for(j=0;j<8;j++) {
503 printf(" %3d", img_dest1[i*8+j]);
504 }
505 printf("\n");
506 }
115329f1 507
a46a3ce4
FB
508 printf("out=\n");
509 for(i=0;i<8;i++) {
510 int j;
511 for(j=0;j<8;j++) {
512 printf(" %3d", img_dest[i*8+j]);
513 }
514 printf("\n");
515 }
516#endif
a46a3ce4
FB
517 }
518 printf("%s %s: err_inf=%d\n",
519 1 ? "IDCT248" : "DCT248",
520 name, err_max);
521
522 ti = gettime();
523 it1 = 0;
524 do {
525 for(it=0;it<NB_ITS_SPEED;it++) {
526 for(i=0; i<64; i++)
527 block[i]= block1[i];
528// memcpy(block, block1, sizeof(DCTELEM) * 64);
755bfeab 529// do not memcpy especially not fastmemcpy because it does movntq !!!
a46a3ce4
FB
530 idct248_put(img_dest, 8, block);
531 }
532 it1 += NB_ITS_SPEED;
533 ti1 = gettime() - ti;
534 } while (ti1 < 1000000);
aadd27cd 535 mmx_emms();
a46a3ce4
FB
536
537 printf("%s %s: %0.1f kdct/s\n",
538 1 ? "IDCT248" : "DCT248",
539 name, (double)it1 * 1000.0 / (double)ti1);
540}
541
504ffed1 542static void help(void)
9e1586fc 543{
86748dbc
MN
544 printf("dct-test [-i] [<test-number>]\n"
545 "test-number 0 -> test with random matrixes\n"
546 " 1 -> test with random sparse matrixes\n"
547 " 2 -> do 3. test from mpeg4 std\n"
a46a3ce4
FB
548 "-i test IDCT implementations\n"
549 "-4 test IDCT248 implementations\n");
9e1586fc
FB
550}
551
de6d9b64
FB
552int main(int argc, char **argv)
553{
a46a3ce4 554 int test_idct = 0, test_248_dct = 0;
86748dbc
MN
555 int c,i;
556 int test=1;
aadd27cd 557 cpu_flags = mm_support();
9e1586fc 558
0de74546 559 ff_ref_dct_init();
9e1586fc 560 idct_mmx_init();
f67a10cd 561
486497e0 562 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
86748dbc 563 for(i=0;i<MAX_NEG_CROP;i++) {
486497e0
MR
564 cropTbl[i] = 0;
565 cropTbl[i + MAX_NEG_CROP + 256] = 255;
86748dbc 566 }
115329f1 567
9e1586fc 568 for(;;) {
a46a3ce4 569 c = getopt(argc, argv, "ih4");
9e1586fc
FB
570 if (c == -1)
571 break;
572 switch(c) {
573 case 'i':
574 test_idct = 1;
575 break;
a46a3ce4
FB
576 case '4':
577 test_248_dct = 1;
578 break;
86748dbc 579 default :
9e1586fc
FB
580 case 'h':
581 help();
c6bdc908 582 return 0;
9e1586fc
FB
583 }
584 }
115329f1 585
86748dbc 586 if(optind <argc) test= atoi(argv[optind]);
115329f1 587
9e1586fc
FB
588 printf("ffmpeg DCT/IDCT test\n");
589
a46a3ce4 590 if (test_248_dct) {
59e6f60a 591 idct248_error("SIMPLE-C", ff_simple_idct248_put);
9e1586fc 592 } else {
3ac35bdb 593 for (i=0;algos[i].name;i++)
dafe8824 594 if (algos[i].is_idct == test_idct && !(~cpu_flags & algos[i].mm_support)) {
3ac35bdb 595 dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
a46a3ce4 596 }
9e1586fc 597 }
de6d9b64
FB
598 return 0;
599}