Move dctref prototypes to a header file
[libav.git] / libavcodec / dct-test.c
CommitLineData
04d7f601
DB
1/*
2 * (c) 2001 Fabrice Bellard
3ac35bdb 3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
04d7f601 4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
04d7f601
DB
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
04d7f601 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
04d7f601
DB
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
04d7f601
DB
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
983e3246 22/**
bad5537e 23 * @file libavcodec/dct-test.c
94f694a4 24 * DCT test (c) 2001 Fabrice Bellard
983e3246
MN
25 * Started from sample code by Juan J. Sierralta P.
26 */
27
de6d9b64
FB
28#include <stdlib.h>
29#include <stdio.h>
30#include <string.h>
31#include <sys/time.h>
32#include <unistd.h>
12807c8d 33#include <math.h>
de6d9b64 34
ae32e509 35#include "libavutil/common.h"
294eaa26 36#include "libavutil/lfg.h"
de6d9b64 37
86748dbc 38#include "simple_idct.h"
10ac3618 39#include "aandcttab.h"
65e4c8c9 40#include "faandct.h"
6f08c541 41#include "faanidct.h"
a6493a8f 42#include "x86/idct_xvid.h"
6a813295 43#include "dctref.h"
9e1586fc 44
434df899
MN
45#undef printf
46
47void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
48
9686df2b
DB
49void ff_mmx_idct(DCTELEM *data);
50void ff_mmxext_idct(DCTELEM *data);
9e1586fc 51
9686df2b 52void odivx_idct_c(short *block);
86748dbc 53
3ac35bdb 54// BFIN
9686df2b
DB
55void ff_bfin_idct(DCTELEM *block);
56void ff_bfin_fdct(DCTELEM *block);
3ac35bdb
MH
57
58// ALTIVEC
9686df2b
DB
59void fdct_altivec(DCTELEM *block);
60//void idct_altivec(DCTELEM *block);?? no routine
3ac35bdb 61
479044ce
MR
62// ARM
63void j_rev_dct_ARM(DCTELEM *data);
64void simple_idct_ARM(DCTELEM *data);
65void simple_idct_armv5te(DCTELEM *data);
66void ff_simple_idct_armv6(DCTELEM *data);
67void ff_simple_idct_neon(DCTELEM *data);
3ac35bdb 68
2a839eeb
MR
69void ff_simple_idct_axp(DCTELEM *data);
70
3ac35bdb 71struct algo {
f5b67781 72 const char *name;
3ac35bdb
MH
73 enum { FDCT, IDCT } is_idct;
74 void (* func) (DCTELEM *block);
75 void (* ref) (DCTELEM *block);
875f3125 76 enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, SSE2_PERM, PARTTRANS_PERM } format;
3794b928 77 int mm_support;
3ac35bdb
MH
78};
79
80#ifndef FAAN_POSTSCALE
81#define FAAN_SCALE SCALE_PERM
82#else
83#define FAAN_SCALE NO_PERM
84#endif
85
aadd27cd
MN
86static int cpu_flags;
87
3ac35bdb 88struct algo algos[] = {
0de74546
DY
89 {"REF-DBL", 0, ff_ref_fdct, ff_ref_fdct, NO_PERM},
90 {"FAAN", 0, ff_faandct, ff_ref_fdct, FAAN_SCALE},
91 {"FAANI", 1, ff_faanidct, ff_ref_idct, NO_PERM},
92 {"IJG-AAN-INT", 0, fdct_ifast, ff_ref_fdct, SCALE_PERM},
93 {"IJG-LLM-INT", 0, ff_jpeg_fdct_islow, ff_ref_fdct, NO_PERM},
94 {"REF-DBL", 1, ff_ref_idct, ff_ref_idct, NO_PERM},
95 {"INT", 1, j_rev_dct, ff_ref_idct, MMX_PERM},
96 {"SIMPLE-C", 1, ff_simple_idct, ff_ref_idct, NO_PERM},
3ac35bdb 97
b250f9c6 98#if HAVE_MMX
0de74546 99 {"MMX", 0, ff_fdct_mmx, ff_ref_fdct, NO_PERM, FF_MM_MMX},
b250f9c6 100#if HAVE_MMX2
147a90a3 101 {"MMX2", 0, ff_fdct_mmx2, ff_ref_fdct, NO_PERM, FF_MM_MMX2},
0de74546 102 {"SSE2", 0, ff_fdct_sse2, ff_ref_fdct, NO_PERM, FF_MM_SSE2},
94254fc0 103#endif
3ac35bdb 104
b250f9c6 105#if CONFIG_GPL
0de74546 106 {"LIBMPEG2-MMX", 1, ff_mmx_idct, ff_ref_idct, MMX_PERM, FF_MM_MMX},
147a90a3 107 {"LIBMPEG2-MMX2", 1, ff_mmxext_idct, ff_ref_idct, MMX_PERM, FF_MM_MMX2},
b9702de5 108#endif
0de74546
DY
109 {"SIMPLE-MMX", 1, ff_simple_idct_mmx, ff_ref_idct, MMX_SIMPLE_PERM, FF_MM_MMX},
110 {"XVID-MMX", 1, ff_idct_xvid_mmx, ff_ref_idct, NO_PERM, FF_MM_MMX},
147a90a3 111 {"XVID-MMX2", 1, ff_idct_xvid_mmx2, ff_ref_idct, NO_PERM, FF_MM_MMX2},
0de74546 112 {"XVID-SSE2", 1, ff_idct_xvid_sse2, ff_ref_idct, SSE2_PERM, FF_MM_SSE2},
3ac35bdb
MH
113#endif
114
b250f9c6 115#if HAVE_ALTIVEC
0de74546 116 {"altivecfdct", 0, fdct_altivec, ff_ref_fdct, NO_PERM, FF_MM_ALTIVEC},
3ac35bdb
MH
117#endif
118
b250f9c6 119#if ARCH_BFIN
0de74546
DY
120 {"BFINfdct", 0, ff_bfin_fdct, ff_ref_fdct, NO_PERM},
121 {"BFINidct", 1, ff_bfin_idct, ff_ref_idct, NO_PERM},
3ac35bdb
MH
122#endif
123
b250f9c6 124#if ARCH_ARM
0de74546
DY
125 {"SIMPLE-ARM", 1, simple_idct_ARM, ff_ref_idct, NO_PERM },
126 {"INT-ARM", 1, j_rev_dct_ARM, ff_ref_idct, MMX_PERM },
b250f9c6 127#if HAVE_ARMV5TE
0de74546 128 {"SIMPLE-ARMV5TE", 1, simple_idct_armv5te, ff_ref_idct, NO_PERM },
479044ce 129#endif
b250f9c6 130#if HAVE_ARMV6
0de74546 131 {"SIMPLE-ARMV6", 1, ff_simple_idct_armv6, ff_ref_idct, MMX_PERM },
479044ce 132#endif
b250f9c6 133#if HAVE_NEON
0de74546 134 {"SIMPLE-NEON", 1, ff_simple_idct_neon, ff_ref_idct, PARTTRANS_PERM },
479044ce 135#endif
a2fc0f6a 136#endif /* ARCH_ARM */
479044ce 137
2a839eeb 138#if ARCH_ALPHA
0de74546 139 {"SIMPLE-ALPHA", 1, ff_simple_idct_axp, ff_ref_idct, NO_PERM },
2a839eeb
MR
140#endif
141
3ac35bdb
MH
142 { 0 }
143};
144
de6d9b64 145#define AANSCALE_BITS 12
de6d9b64 146
486497e0 147uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
86748dbc 148
504ffed1 149static int64_t gettime(void)
de6d9b64
FB
150{
151 struct timeval tv;
152 gettimeofday(&tv,NULL);
0c1a9eda 153 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
de6d9b64
FB
154}
155
156#define NB_ITS 20000
157#define NB_ITS_SPEED 50000
158
9e1586fc
FB
159static short idct_mmx_perm[64];
160
86748dbc 161static short idct_simple_mmx_perm[64]={
bb270c08
DB
162 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
163 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
164 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
165 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
166 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
167 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
168 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
169 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
86748dbc
MN
170};
171
ad246860
AS
172static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
173
504ffed1 174static void idct_mmx_init(void)
9e1586fc
FB
175{
176 int i;
177
178 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
179 for (i = 0; i < 64; i++) {
bb270c08
DB
180 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
181// idct_simple_mmx_perm[i] = simple_block_permute_op(i);
9e1586fc
FB
182 }
183}
184
c6727809
MR
185DECLARE_ALIGNED(16, static DCTELEM, block)[64];
186DECLARE_ALIGNED(8, static DCTELEM, block1)[64];
187DECLARE_ALIGNED(8, static DCTELEM, block_org)[64];
9e1586fc 188
aadd27cd
MN
189static inline void mmx_emms(void)
190{
b250f9c6 191#if HAVE_MMX
82d1605f 192 if (cpu_flags & FF_MM_MMX)
be449fca 193 __asm__ volatile ("emms\n\t");
aadd27cd
MN
194#endif
195}
196
504ffed1 197static void dct_error(const char *name, int is_idct,
9e1586fc 198 void (*fdct_func)(DCTELEM *block),
3ac35bdb 199 void (*fdct_ref)(DCTELEM *block), int form, int test)
de6d9b64
FB
200{
201 int it, i, scale;
de6d9b64 202 int err_inf, v;
0c1a9eda
ZK
203 int64_t err2, ti, ti1, it1;
204 int64_t sysErr[64], sysErrMax=0;
86748dbc 205 int maxout=0;
86748dbc 206 int blockSumErrMax=0, blockSumErr;
64bde197 207 AVLFG prng;
de6d9b64 208
64bde197 209 av_lfg_init(&prng, 1);
de6d9b64
FB
210
211 err_inf = 0;
212 err2 = 0;
86748dbc 213 for(i=0; i<64; i++) sysErr[i]=0;
de6d9b64 214 for(it=0;it<NB_ITS;it++) {
86748dbc
MN
215 for(i=0;i<64;i++)
216 block1[i] = 0;
217 switch(test){
115329f1 218 case 0:
86748dbc 219 for(i=0;i<64;i++)
64bde197 220 block1[i] = (av_lfg_get(&prng) % 512) -256;
ad324c93 221 if (is_idct){
0de74546 222 ff_ref_fdct(block1);
ad324c93
MN
223
224 for(i=0;i<64;i++)
225 block1[i]>>=3;
226 }
86748dbc
MN
227 break;
228 case 1:{
64bde197 229 int num = av_lfg_get(&prng) % 10 + 1;
86748dbc 230 for(i=0;i<num;i++)
64bde197 231 block1[av_lfg_get(&prng) % 64] = av_lfg_get(&prng) % 512 -256;
86748dbc
MN
232 }break;
233 case 2:
64bde197 234 block1[0] = av_lfg_get(&prng) % 4096 - 2048;
86748dbc
MN
235 block1[63]= (block1[0]&1)^1;
236 break;
237 }
9e1586fc 238
86748dbc
MN
239#if 0 // simulate mismatch control
240{ int sum=0;
241 for(i=0;i<64;i++)
242 sum+=block1[i];
243
115329f1 244 if((sum&1)==0) block1[63]^=1;
86748dbc
MN
245}
246#endif
247
248 for(i=0; i<64; i++)
249 block_org[i]= block1[i];
9e1586fc 250
3ac35bdb 251 if (form == MMX_PERM) {
86748dbc 252 for(i=0;i<64;i++)
9e1586fc 253 block[idct_mmx_perm[i]] = block1[i];
3ac35bdb 254 } else if (form == MMX_SIMPLE_PERM) {
86748dbc
MN
255 for(i=0;i<64;i++)
256 block[idct_simple_mmx_perm[i]] = block1[i];
257
ad246860
AS
258 } else if (form == SSE2_PERM) {
259 for(i=0; i<64; i++)
260 block[(i&0x38) | idct_sse2_row_perm[i&7]] = block1[i];
875f3125
MR
261 } else if (form == PARTTRANS_PERM) {
262 for(i=0; i<64; i++)
263 block[(i&0x24) | ((i&3)<<3) | ((i>>3)&3)] = block1[i];
bb270c08 264 } else {
86748dbc
MN
265 for(i=0; i<64; i++)
266 block[i]= block1[i];
9e1586fc 267 }
86748dbc
MN
268#if 0 // simulate mismatch control for tested IDCT but not the ref
269{ int sum=0;
270 for(i=0;i<64;i++)
271 sum+=block[i];
272
115329f1 273 if((sum&1)==0) block[63]^=1;
86748dbc
MN
274}
275#endif
9e1586fc 276
de6d9b64 277 fdct_func(block);
aadd27cd 278 mmx_emms();
9e1586fc 279
3ac35bdb 280 if (form == SCALE_PERM) {
de6d9b64 281 for(i=0; i<64; i++) {
10ac3618 282 scale = 8*(1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
86748dbc
MN
283 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
284 }
285 }
286
9e1586fc 287 fdct_ref(block1);
de6d9b64 288
86748dbc 289 blockSumErr=0;
de6d9b64
FB
290 for(i=0;i<64;i++) {
291 v = abs(block[i] - block1[i]);
292 if (v > err_inf)
293 err_inf = v;
294 err2 += v * v;
bb270c08
DB
295 sysErr[i] += block[i] - block1[i];
296 blockSumErr += v;
297 if( abs(block[i])>maxout) maxout=abs(block[i]);
de6d9b64 298 }
86748dbc
MN
299 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
300#if 0 // print different matrix pairs
301 if(blockSumErr){
302 printf("\n");
303 for(i=0; i<64; i++){
304 if((i&7)==0) printf("\n");
305 printf("%4d ", block_org[i]);
306 }
307 for(i=0; i<64; i++){
308 if((i&7)==0) printf("\n");
309 printf("%4d ", block[i] - block1[i]);
310 }
311 }
312#endif
313 }
ae32e509 314 for(i=0; i<64; i++) sysErrMax= FFMAX(sysErrMax, FFABS(sysErr[i]));
115329f1 315
86748dbc
MN
316#if 1 // dump systematic errors
317 for(i=0; i<64; i++){
bb270c08 318 if(i%8==0) printf("\n");
2029e934 319 printf("%7d ", (int)sysErr[i]);
de6d9b64 320 }
86748dbc
MN
321 printf("\n");
322#endif
115329f1 323
86748dbc 324 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
9e1586fc 325 is_idct ? "IDCT" : "DCT",
86748dbc
MN
326 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
327#if 1 //Speed test
de6d9b64 328 /* speed test */
86748dbc
MN
329 for(i=0;i<64;i++)
330 block1[i] = 0;
331 switch(test){
115329f1 332 case 0:
86748dbc 333 for(i=0;i<64;i++)
64bde197 334 block1[i] = av_lfg_get(&prng) % 512 -256;
ad324c93 335 if (is_idct){
0de74546 336 ff_ref_fdct(block1);
ad324c93
MN
337
338 for(i=0;i<64;i++)
339 block1[i]>>=3;
340 }
86748dbc
MN
341 break;
342 case 1:{
343 case 2:
64bde197
DB
344 block1[0] = av_lfg_get(&prng) % 512 -256;
345 block1[1] = av_lfg_get(&prng) % 512 -256;
346 block1[2] = av_lfg_get(&prng) % 512 -256;
347 block1[3] = av_lfg_get(&prng) % 512 -256;
86748dbc
MN
348 }break;
349 }
de6d9b64 350
3ac35bdb 351 if (form == MMX_PERM) {
86748dbc 352 for(i=0;i<64;i++)
9e1586fc 353 block[idct_mmx_perm[i]] = block1[i];
3ac35bdb 354 } else if(form == MMX_SIMPLE_PERM) {
86748dbc
MN
355 for(i=0;i<64;i++)
356 block[idct_simple_mmx_perm[i]] = block1[i];
357 } else {
358 for(i=0; i<64; i++)
359 block[i]= block1[i];
9e1586fc
FB
360 }
361
de6d9b64
FB
362 ti = gettime();
363 it1 = 0;
364 do {
365 for(it=0;it<NB_ITS_SPEED;it++) {
86748dbc
MN
366 for(i=0; i<64; i++)
367 block[i]= block1[i];
368// memcpy(block, block1, sizeof(DCTELEM) * 64);
755bfeab 369// do not memcpy especially not fastmemcpy because it does movntq !!!
de6d9b64
FB
370 fdct_func(block);
371 }
372 it1 += NB_ITS_SPEED;
373 ti1 = gettime() - ti;
374 } while (ti1 < 1000000);
aadd27cd 375 mmx_emms();
de6d9b64 376
86748dbc 377 printf("%s %s: %0.1f kdct/s\n",
9e1586fc 378 is_idct ? "IDCT" : "DCT",
de6d9b64 379 name, (double)it1 * 1000.0 / (double)ti1);
86748dbc 380#endif
de6d9b64
FB
381}
382
c6727809
MR
383DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
384DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];
a46a3ce4 385
504ffed1 386static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
a46a3ce4
FB
387{
388 static int init;
389 static double c8[8][8];
390 static double c4[4][4];
391 double block1[64], block2[64], block3[64];
392 double s, sum, v;
393 int i, j, k;
394
395 if (!init) {
396 init = 1;
397
398 for(i=0;i<8;i++) {
399 sum = 0;
400 for(j=0;j<8;j++) {
401 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
402 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
403 sum += c8[i][j] * c8[i][j];
404 }
405 }
115329f1 406
a46a3ce4
FB
407 for(i=0;i<4;i++) {
408 sum = 0;
409 for(j=0;j<4;j++) {
410 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
411 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
412 sum += c4[i][j] * c4[i][j];
413 }
414 }
415 }
416
417 /* butterfly */
652f0197 418 s = 0.5 * sqrt(2.0);
a46a3ce4
FB
419 for(i=0;i<4;i++) {
420 for(j=0;j<8;j++) {
652f0197
FB
421 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
422 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
a46a3ce4
FB
423 }
424 }
425
426 /* idct8 on lines */
427 for(i=0;i<8;i++) {
428 for(j=0;j<8;j++) {
429 sum = 0;
430 for(k=0;k<8;k++)
431 sum += c8[k][j] * block1[8*i+k];
432 block2[8*i+j] = sum;
433 }
434 }
435
436 /* idct4 */
437 for(i=0;i<8;i++) {
438 for(j=0;j<4;j++) {
439 /* top */
440 sum = 0;
441 for(k=0;k<4;k++)
442 sum += c4[k][j] * block2[8*(2*k)+i];
443 block3[8*(2*j)+i] = sum;
444
445 /* bottom */
446 sum = 0;
447 for(k=0;k<4;k++)
448 sum += c4[k][j] * block2[8*(2*k+1)+i];
449 block3[8*(2*j+1)+i] = sum;
450 }
451 }
452
453 /* clamp and store the result */
454 for(i=0;i<8;i++) {
455 for(j=0;j<8;j++) {
652f0197 456 v = block3[8*i+j];
a46a3ce4
FB
457 if (v < 0)
458 v = 0;
459 else if (v > 255)
460 v = 255;
461 dest[i * linesize + j] = (int)rint(v);
462 }
463 }
464}
465
504ffed1 466static void idct248_error(const char *name,
0c1a9eda 467 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
a46a3ce4
FB
468{
469 int it, i, it1, ti, ti1, err_max, v;
470
64bde197 471 AVLFG prng;
294eaa26 472
64bde197 473 av_lfg_init(&prng, 1);
115329f1 474
a46a3ce4
FB
475 /* just one test to see if code is correct (precision is less
476 important here) */
477 err_max = 0;
478 for(it=0;it<NB_ITS;it++) {
115329f1 479
652f0197
FB
480 /* XXX: use forward transform to generate values */
481 for(i=0;i<64;i++)
64bde197 482 block1[i] = av_lfg_get(&prng) % 256 - 128;
652f0197
FB
483 block1[0] += 1024;
484
a46a3ce4
FB
485 for(i=0; i<64; i++)
486 block[i]= block1[i];
487 idct248_ref(img_dest1, 8, block);
115329f1 488
652f0197
FB
489 for(i=0; i<64; i++)
490 block[i]= block1[i];
491 idct248_put(img_dest, 8, block);
115329f1 492
652f0197
FB
493 for(i=0;i<64;i++) {
494 v = abs((int)img_dest[i] - (int)img_dest1[i]);
495 if (v == 255)
496 printf("%d %d\n", img_dest[i], img_dest1[i]);
497 if (v > err_max)
498 err_max = v;
499 }
a46a3ce4
FB
500#if 0
501 printf("ref=\n");
502 for(i=0;i<8;i++) {
503 int j;
504 for(j=0;j<8;j++) {
505 printf(" %3d", img_dest1[i*8+j]);
506 }
507 printf("\n");
508 }
115329f1 509
a46a3ce4
FB
510 printf("out=\n");
511 for(i=0;i<8;i++) {
512 int j;
513 for(j=0;j<8;j++) {
514 printf(" %3d", img_dest[i*8+j]);
515 }
516 printf("\n");
517 }
518#endif
a46a3ce4
FB
519 }
520 printf("%s %s: err_inf=%d\n",
521 1 ? "IDCT248" : "DCT248",
522 name, err_max);
523
524 ti = gettime();
525 it1 = 0;
526 do {
527 for(it=0;it<NB_ITS_SPEED;it++) {
528 for(i=0; i<64; i++)
529 block[i]= block1[i];
530// memcpy(block, block1, sizeof(DCTELEM) * 64);
755bfeab 531// do not memcpy especially not fastmemcpy because it does movntq !!!
a46a3ce4
FB
532 idct248_put(img_dest, 8, block);
533 }
534 it1 += NB_ITS_SPEED;
535 ti1 = gettime() - ti;
536 } while (ti1 < 1000000);
aadd27cd 537 mmx_emms();
a46a3ce4
FB
538
539 printf("%s %s: %0.1f kdct/s\n",
540 1 ? "IDCT248" : "DCT248",
541 name, (double)it1 * 1000.0 / (double)ti1);
542}
543
504ffed1 544static void help(void)
9e1586fc 545{
86748dbc
MN
546 printf("dct-test [-i] [<test-number>]\n"
547 "test-number 0 -> test with random matrixes\n"
548 " 1 -> test with random sparse matrixes\n"
549 " 2 -> do 3. test from mpeg4 std\n"
a46a3ce4
FB
550 "-i test IDCT implementations\n"
551 "-4 test IDCT248 implementations\n");
9e1586fc
FB
552}
553
de6d9b64
FB
554int main(int argc, char **argv)
555{
a46a3ce4 556 int test_idct = 0, test_248_dct = 0;
86748dbc
MN
557 int c,i;
558 int test=1;
aadd27cd 559 cpu_flags = mm_support();
9e1586fc 560
0de74546 561 ff_ref_dct_init();
9e1586fc 562 idct_mmx_init();
f67a10cd 563
486497e0 564 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
86748dbc 565 for(i=0;i<MAX_NEG_CROP;i++) {
486497e0
MR
566 cropTbl[i] = 0;
567 cropTbl[i + MAX_NEG_CROP + 256] = 255;
86748dbc 568 }
115329f1 569
9e1586fc 570 for(;;) {
a46a3ce4 571 c = getopt(argc, argv, "ih4");
9e1586fc
FB
572 if (c == -1)
573 break;
574 switch(c) {
575 case 'i':
576 test_idct = 1;
577 break;
a46a3ce4
FB
578 case '4':
579 test_248_dct = 1;
580 break;
86748dbc 581 default :
9e1586fc
FB
582 case 'h':
583 help();
c6bdc908 584 return 0;
9e1586fc
FB
585 }
586 }
115329f1 587
86748dbc 588 if(optind <argc) test= atoi(argv[optind]);
115329f1 589
9e1586fc
FB
590 printf("ffmpeg DCT/IDCT test\n");
591
a46a3ce4 592 if (test_248_dct) {
59e6f60a 593 idct248_error("SIMPLE-C", ff_simple_idct248_put);
9e1586fc 594 } else {
3ac35bdb 595 for (i=0;algos[i].name;i++)
dafe8824 596 if (algos[i].is_idct == test_idct && !(~cpu_flags & algos[i].mm_support)) {
3ac35bdb 597 dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
a46a3ce4 598 }
9e1586fc 599 }
de6d9b64
FB
600 return 0;
601}