Reportedly FFmpeg fails to compile on Cygwin with vhook enabled, but FFserver
[libav.git] / libavcodec / dct-test.c
CommitLineData
983e3246
MN
1/**
2 * @file dct-test.c
115329f1 3 * DCT test. (c) 2001 Fabrice Bellard.
983e3246
MN
4 * Started from sample code by Juan J. Sierralta P.
5 */
6
de6d9b64
FB
7#include <stdlib.h>
8#include <stdio.h>
9#include <string.h>
10#include <sys/time.h>
11#include <unistd.h>
12
13#include "dsputil.h"
14
9e1586fc 15#include "i386/mmx.h"
86748dbc 16#include "simple_idct.h"
65e4c8c9 17#include "faandct.h"
9e1586fc 18
e366e679
FB
19#ifndef MAX
20#define MAX(a, b) (((a) > (b)) ? (a) : (b))
21#endif
22
434df899
MN
23#undef printf
24
25void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
26
9e1586fc 27/* reference fdct/idct */
de6d9b64 28extern void fdct(DCTELEM *block);
9e1586fc 29extern void idct(DCTELEM *block);
434df899
MN
30extern void ff_idct_xvid_mmx(DCTELEM *block);
31extern void ff_idct_xvid_mmx2(DCTELEM *block);
de6d9b64
FB
32extern void init_fdct();
33
9e1586fc
FB
34extern void j_rev_dct(DCTELEM *data);
35extern void ff_mmx_idct(DCTELEM *data);
36extern void ff_mmxext_idct(DCTELEM *data);
37
86748dbc
MN
38extern void odivx_idct_c (short *block);
39
de6d9b64
FB
40#define AANSCALE_BITS 12
41static const unsigned short aanscales[64] = {
42 /* precomputed values scaled up by 14 bits */
43 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
44 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
45 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
46 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
47 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
48 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
49 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
50 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
51};
52
0c1a9eda 53uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
86748dbc 54
0c1a9eda 55int64_t gettime(void)
de6d9b64
FB
56{
57 struct timeval tv;
58 gettimeofday(&tv,NULL);
0c1a9eda 59 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
de6d9b64
FB
60}
61
62#define NB_ITS 20000
63#define NB_ITS_SPEED 50000
64
9e1586fc
FB
65static short idct_mmx_perm[64];
66
86748dbc 67static short idct_simple_mmx_perm[64]={
115329f1
DB
68 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
69 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
70 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
71 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
72 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
73 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
74 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
86748dbc
MN
75 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
76};
77
9e1586fc
FB
78void idct_mmx_init(void)
79{
80 int i;
81
82 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
83 for (i = 0; i < 64; i++) {
84 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
86748dbc 85// idct_simple_mmx_perm[i] = simple_block_permute_op(i);
9e1586fc
FB
86 }
87}
88
89static DCTELEM block[64] __attribute__ ((aligned (8)));
90static DCTELEM block1[64] __attribute__ ((aligned (8)));
86748dbc 91static DCTELEM block_org[64] __attribute__ ((aligned (8)));
9e1586fc
FB
92
93void dct_error(const char *name, int is_idct,
94 void (*fdct_func)(DCTELEM *block),
86748dbc 95 void (*fdct_ref)(DCTELEM *block), int test)
de6d9b64
FB
96{
97 int it, i, scale;
de6d9b64 98 int err_inf, v;
0c1a9eda
ZK
99 int64_t err2, ti, ti1, it1;
100 int64_t sysErr[64], sysErrMax=0;
86748dbc 101 int maxout=0;
86748dbc 102 int blockSumErrMax=0, blockSumErr;
de6d9b64
FB
103
104 srandom(0);
105
106 err_inf = 0;
107 err2 = 0;
86748dbc 108 for(i=0; i<64; i++) sysErr[i]=0;
de6d9b64 109 for(it=0;it<NB_ITS;it++) {
86748dbc
MN
110 for(i=0;i<64;i++)
111 block1[i] = 0;
112 switch(test){
115329f1 113 case 0:
86748dbc
MN
114 for(i=0;i<64;i++)
115 block1[i] = (random() % 512) -256;
ad324c93 116 if (is_idct){
86748dbc 117 fdct(block1);
ad324c93
MN
118
119 for(i=0;i<64;i++)
120 block1[i]>>=3;
121 }
86748dbc
MN
122 break;
123 case 1:{
124 int num= (random()%10)+1;
125 for(i=0;i<num;i++)
126 block1[random()%64] = (random() % 512) -256;
127 }break;
128 case 2:
129 block1[0]= (random()%4096)-2048;
130 block1[63]= (block1[0]&1)^1;
131 break;
132 }
9e1586fc 133
86748dbc
MN
134#if 0 // simulate mismatch control
135{ int sum=0;
136 for(i=0;i<64;i++)
137 sum+=block1[i];
138
115329f1 139 if((sum&1)==0) block1[63]^=1;
86748dbc
MN
140}
141#endif
142
143 for(i=0; i<64; i++)
144 block_org[i]= block1[i];
9e1586fc
FB
145
146 if (fdct_func == ff_mmx_idct ||
86748dbc
MN
147 fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) {
148 for(i=0;i<64;i++)
9e1586fc 149 block[idct_mmx_perm[i]] = block1[i];
a46a3ce4 150 } else if(fdct_func == ff_simple_idct_mmx ) {
86748dbc
MN
151 for(i=0;i<64;i++)
152 block[idct_simple_mmx_perm[i]] = block1[i];
153
154 } else {
155 for(i=0; i<64; i++)
156 block[i]= block1[i];
9e1586fc 157 }
86748dbc
MN
158#if 0 // simulate mismatch control for tested IDCT but not the ref
159{ int sum=0;
160 for(i=0;i<64;i++)
161 sum+=block[i];
162
115329f1 163 if((sum&1)==0) block[63]^=1;
86748dbc
MN
164}
165#endif
9e1586fc 166
de6d9b64 167 fdct_func(block);
9e1586fc
FB
168 emms(); /* for ff_mmx_idct */
169
115329f1
DB
170 if (fdct_func == fdct_ifast
171#ifndef FAAN_POSTSCALE
b4c3816c
MN
172 || fdct_func == ff_faandct
173#endif
174 ) {
de6d9b64 175 for(i=0; i<64; i++) {
ad324c93 176 scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
86748dbc
MN
177 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
178 }
179 }
180
9e1586fc 181 fdct_ref(block1);
de6d9b64 182
86748dbc 183 blockSumErr=0;
de6d9b64
FB
184 for(i=0;i<64;i++) {
185 v = abs(block[i] - block1[i]);
186 if (v > err_inf)
187 err_inf = v;
188 err2 += v * v;
86748dbc
MN
189 sysErr[i] += block[i] - block1[i];
190 blockSumErr += v;
191 if( abs(block[i])>maxout) maxout=abs(block[i]);
de6d9b64 192 }
86748dbc
MN
193 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
194#if 0 // print different matrix pairs
195 if(blockSumErr){
196 printf("\n");
197 for(i=0; i<64; i++){
198 if((i&7)==0) printf("\n");
199 printf("%4d ", block_org[i]);
200 }
201 for(i=0; i<64; i++){
202 if((i&7)==0) printf("\n");
203 printf("%4d ", block[i] - block1[i]);
204 }
205 }
206#endif
207 }
208 for(i=0; i<64; i++) sysErrMax= MAX(sysErrMax, ABS(sysErr[i]));
115329f1 209
86748dbc
MN
210#if 1 // dump systematic errors
211 for(i=0; i<64; i++){
212 if(i%8==0) printf("\n");
213 printf("%5d ", (int)sysErr[i]);
de6d9b64 214 }
86748dbc
MN
215 printf("\n");
216#endif
115329f1 217
86748dbc 218 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
9e1586fc 219 is_idct ? "IDCT" : "DCT",
86748dbc
MN
220 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
221#if 1 //Speed test
de6d9b64 222 /* speed test */
86748dbc
MN
223 for(i=0;i<64;i++)
224 block1[i] = 0;
225 switch(test){
115329f1 226 case 0:
86748dbc
MN
227 for(i=0;i<64;i++)
228 block1[i] = (random() % 512) -256;
ad324c93 229 if (is_idct){
86748dbc 230 fdct(block1);
ad324c93
MN
231
232 for(i=0;i<64;i++)
233 block1[i]>>=3;
234 }
86748dbc
MN
235 break;
236 case 1:{
237 case 2:
238 block1[0] = (random() % 512) -256;
239 block1[1] = (random() % 512) -256;
240 block1[2] = (random() % 512) -256;
241 block1[3] = (random() % 512) -256;
242 }break;
243 }
de6d9b64 244
9e1586fc 245 if (fdct_func == ff_mmx_idct ||
86748dbc
MN
246 fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) {
247 for(i=0;i<64;i++)
9e1586fc 248 block[idct_mmx_perm[i]] = block1[i];
a46a3ce4 249 } else if(fdct_func == ff_simple_idct_mmx ) {
86748dbc
MN
250 for(i=0;i<64;i++)
251 block[idct_simple_mmx_perm[i]] = block1[i];
252 } else {
253 for(i=0; i<64; i++)
254 block[i]= block1[i];
9e1586fc
FB
255 }
256
de6d9b64
FB
257 ti = gettime();
258 it1 = 0;
259 do {
260 for(it=0;it<NB_ITS_SPEED;it++) {
86748dbc
MN
261 for(i=0; i<64; i++)
262 block[i]= block1[i];
263// memcpy(block, block1, sizeof(DCTELEM) * 64);
264// dont memcpy especially not fastmemcpy because it does movntq !!!
de6d9b64
FB
265 fdct_func(block);
266 }
267 it1 += NB_ITS_SPEED;
268 ti1 = gettime() - ti;
269 } while (ti1 < 1000000);
9e1586fc 270 emms();
de6d9b64 271
86748dbc 272 printf("%s %s: %0.1f kdct/s\n",
9e1586fc 273 is_idct ? "IDCT" : "DCT",
de6d9b64 274 name, (double)it1 * 1000.0 / (double)ti1);
86748dbc 275#endif
de6d9b64
FB
276}
277
0c1a9eda
ZK
278static uint8_t img_dest[64] __attribute__ ((aligned (8)));
279static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
a46a3ce4 280
0c1a9eda 281void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
a46a3ce4
FB
282{
283 static int init;
284 static double c8[8][8];
285 static double c4[4][4];
286 double block1[64], block2[64], block3[64];
287 double s, sum, v;
288 int i, j, k;
289
290 if (!init) {
291 init = 1;
292
293 for(i=0;i<8;i++) {
294 sum = 0;
295 for(j=0;j<8;j++) {
296 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
297 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
298 sum += c8[i][j] * c8[i][j];
299 }
300 }
115329f1 301
a46a3ce4
FB
302 for(i=0;i<4;i++) {
303 sum = 0;
304 for(j=0;j<4;j++) {
305 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
306 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
307 sum += c4[i][j] * c4[i][j];
308 }
309 }
310 }
311
312 /* butterfly */
652f0197 313 s = 0.5 * sqrt(2.0);
a46a3ce4
FB
314 for(i=0;i<4;i++) {
315 for(j=0;j<8;j++) {
652f0197
FB
316 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
317 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
a46a3ce4
FB
318 }
319 }
320
321 /* idct8 on lines */
322 for(i=0;i<8;i++) {
323 for(j=0;j<8;j++) {
324 sum = 0;
325 for(k=0;k<8;k++)
326 sum += c8[k][j] * block1[8*i+k];
327 block2[8*i+j] = sum;
328 }
329 }
330
331 /* idct4 */
332 for(i=0;i<8;i++) {
333 for(j=0;j<4;j++) {
334 /* top */
335 sum = 0;
336 for(k=0;k<4;k++)
337 sum += c4[k][j] * block2[8*(2*k)+i];
338 block3[8*(2*j)+i] = sum;
339
340 /* bottom */
341 sum = 0;
342 for(k=0;k<4;k++)
343 sum += c4[k][j] * block2[8*(2*k+1)+i];
344 block3[8*(2*j+1)+i] = sum;
345 }
346 }
347
348 /* clamp and store the result */
349 for(i=0;i<8;i++) {
350 for(j=0;j<8;j++) {
652f0197 351 v = block3[8*i+j];
a46a3ce4
FB
352 if (v < 0)
353 v = 0;
354 else if (v > 255)
355 v = 255;
356 dest[i * linesize + j] = (int)rint(v);
357 }
358 }
359}
360
115329f1 361void idct248_error(const char *name,
0c1a9eda 362 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
a46a3ce4
FB
363{
364 int it, i, it1, ti, ti1, err_max, v;
365
366 srandom(0);
115329f1 367
a46a3ce4
FB
368 /* just one test to see if code is correct (precision is less
369 important here) */
370 err_max = 0;
371 for(it=0;it<NB_ITS;it++) {
115329f1 372
652f0197
FB
373 /* XXX: use forward transform to generate values */
374 for(i=0;i<64;i++)
375 block1[i] = (random() % 256) - 128;
376 block1[0] += 1024;
377
a46a3ce4
FB
378 for(i=0; i<64; i++)
379 block[i]= block1[i];
380 idct248_ref(img_dest1, 8, block);
115329f1 381
652f0197
FB
382 for(i=0; i<64; i++)
383 block[i]= block1[i];
384 idct248_put(img_dest, 8, block);
115329f1 385
652f0197
FB
386 for(i=0;i<64;i++) {
387 v = abs((int)img_dest[i] - (int)img_dest1[i]);
388 if (v == 255)
389 printf("%d %d\n", img_dest[i], img_dest1[i]);
390 if (v > err_max)
391 err_max = v;
392 }
a46a3ce4
FB
393#if 0
394 printf("ref=\n");
395 for(i=0;i<8;i++) {
396 int j;
397 for(j=0;j<8;j++) {
398 printf(" %3d", img_dest1[i*8+j]);
399 }
400 printf("\n");
401 }
115329f1 402
a46a3ce4
FB
403 printf("out=\n");
404 for(i=0;i<8;i++) {
405 int j;
406 for(j=0;j<8;j++) {
407 printf(" %3d", img_dest[i*8+j]);
408 }
409 printf("\n");
410 }
411#endif
a46a3ce4
FB
412 }
413 printf("%s %s: err_inf=%d\n",
414 1 ? "IDCT248" : "DCT248",
415 name, err_max);
416
417 ti = gettime();
418 it1 = 0;
419 do {
420 for(it=0;it<NB_ITS_SPEED;it++) {
421 for(i=0; i<64; i++)
422 block[i]= block1[i];
423// memcpy(block, block1, sizeof(DCTELEM) * 64);
424// dont memcpy especially not fastmemcpy because it does movntq !!!
425 idct248_put(img_dest, 8, block);
426 }
427 it1 += NB_ITS_SPEED;
428 ti1 = gettime() - ti;
429 } while (ti1 < 1000000);
430 emms();
431
432 printf("%s %s: %0.1f kdct/s\n",
433 1 ? "IDCT248" : "DCT248",
434 name, (double)it1 * 1000.0 / (double)ti1);
435}
436
9e1586fc
FB
437void help(void)
438{
86748dbc
MN
439 printf("dct-test [-i] [<test-number>]\n"
440 "test-number 0 -> test with random matrixes\n"
441 " 1 -> test with random sparse matrixes\n"
442 " 2 -> do 3. test from mpeg4 std\n"
a46a3ce4
FB
443 "-i test IDCT implementations\n"
444 "-4 test IDCT248 implementations\n");
9e1586fc
FB
445 exit(1);
446}
447
de6d9b64
FB
448int main(int argc, char **argv)
449{
a46a3ce4 450 int test_idct = 0, test_248_dct = 0;
86748dbc
MN
451 int c,i;
452 int test=1;
9e1586fc 453
de6d9b64 454 init_fdct();
9e1586fc 455 idct_mmx_init();
de6d9b64 456
86748dbc
MN
457 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
458 for(i=0;i<MAX_NEG_CROP;i++) {
459 cropTbl[i] = 0;
460 cropTbl[i + MAX_NEG_CROP + 256] = 255;
461 }
115329f1 462
9e1586fc 463 for(;;) {
a46a3ce4 464 c = getopt(argc, argv, "ih4");
9e1586fc
FB
465 if (c == -1)
466 break;
467 switch(c) {
468 case 'i':
469 test_idct = 1;
470 break;
a46a3ce4
FB
471 case '4':
472 test_248_dct = 1;
473 break;
86748dbc 474 default :
9e1586fc
FB
475 case 'h':
476 help();
477 break;
478 }
479 }
115329f1 480
86748dbc 481 if(optind <argc) test= atoi(argv[optind]);
115329f1 482
9e1586fc
FB
483 printf("ffmpeg DCT/IDCT test\n");
484
a46a3ce4
FB
485 if (test_248_dct) {
486 idct248_error("SIMPLE-C", simple_idct248_put);
9e1586fc 487 } else {
a46a3ce4
FB
488 if (!test_idct) {
489 dct_error("REF-DBL", 0, fdct, fdct, test); /* only to verify code ! */
490 dct_error("IJG-AAN-INT", 0, fdct_ifast, fdct, test);
491 dct_error("IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, test);
492 dct_error("MMX", 0, ff_fdct_mmx, fdct, test);
94789b9e 493 dct_error("MMX2", 0, ff_fdct_mmx2, fdct, test);
65e4c8c9 494 dct_error("FAAN", 0, ff_faandct, fdct, test);
a46a3ce4
FB
495 } else {
496 dct_error("REF-DBL", 1, idct, idct, test);
497 dct_error("INT", 1, j_rev_dct, idct, test);
498 dct_error("LIBMPEG2-MMX", 1, ff_mmx_idct, idct, test);
499 dct_error("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, test);
500 dct_error("SIMPLE-C", 1, simple_idct, idct, test);
501 dct_error("SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, test);
434df899
MN
502 dct_error("XVID-MMX", 1, ff_idct_xvid_mmx, idct, test);
503 dct_error("XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, test);
a46a3ce4
FB
504 // dct_error("ODIVX-C", 1, odivx_idct_c, idct);
505 //printf(" test against odivx idct\n");
506 // dct_error("REF", 1, idct, odivx_idct_c);
507 // dct_error("INT", 1, j_rev_dct, odivx_idct_c);
508 // dct_error("MMX", 1, ff_mmx_idct, odivx_idct_c);
509 // dct_error("MMXEXT", 1, ff_mmxext_idct, odivx_idct_c);
510 // dct_error("SIMPLE-C", 1, simple_idct, odivx_idct_c);
511 // dct_error("SIMPLE-MMX", 1, ff_simple_idct_mmx, odivx_idct_c);
512 // dct_error("ODIVX-C", 1, odivx_idct_c, odivx_idct_c);
513 }
9e1586fc 514 }
de6d9b64
FB
515 return 0;
516}