Disable B-frames decoding until they produce correct picture
[libav.git] / libavcodec / dct-test.c
CommitLineData
983e3246
MN
1/**
2 * @file dct-test.c
115329f1 3 * DCT test. (c) 2001 Fabrice Bellard.
983e3246
MN
4 * Started from sample code by Juan J. Sierralta P.
5 */
6
de6d9b64
FB
7#include <stdlib.h>
8#include <stdio.h>
9#include <string.h>
10#include <sys/time.h>
11#include <unistd.h>
12
13#include "dsputil.h"
14
86748dbc 15#include "simple_idct.h"
65e4c8c9 16#include "faandct.h"
9e1586fc 17
e366e679
FB
18#ifndef MAX
19#define MAX(a, b) (((a) > (b)) ? (a) : (b))
20#endif
21
434df899
MN
22#undef printf
23
24void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
25
9e1586fc 26/* reference fdct/idct */
de6d9b64 27extern void fdct(DCTELEM *block);
9e1586fc 28extern void idct(DCTELEM *block);
434df899
MN
29extern void ff_idct_xvid_mmx(DCTELEM *block);
30extern void ff_idct_xvid_mmx2(DCTELEM *block);
de6d9b64
FB
31extern void init_fdct();
32
9e1586fc
FB
33extern void j_rev_dct(DCTELEM *data);
34extern void ff_mmx_idct(DCTELEM *data);
35extern void ff_mmxext_idct(DCTELEM *data);
36
86748dbc
MN
37extern void odivx_idct_c (short *block);
38
de6d9b64
FB
39#define AANSCALE_BITS 12
40static const unsigned short aanscales[64] = {
41 /* precomputed values scaled up by 14 bits */
42 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
43 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
44 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
45 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
46 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
47 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
48 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
49 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
50};
51
0c1a9eda 52uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
86748dbc 53
0c1a9eda 54int64_t gettime(void)
de6d9b64
FB
55{
56 struct timeval tv;
57 gettimeofday(&tv,NULL);
0c1a9eda 58 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
de6d9b64
FB
59}
60
61#define NB_ITS 20000
62#define NB_ITS_SPEED 50000
63
9e1586fc
FB
64static short idct_mmx_perm[64];
65
86748dbc 66static short idct_simple_mmx_perm[64]={
bb270c08
DB
67 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
68 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
69 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
70 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
71 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
72 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
73 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
74 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
86748dbc
MN
75};
76
9e1586fc
FB
77void idct_mmx_init(void)
78{
79 int i;
80
81 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
82 for (i = 0; i < 64; i++) {
bb270c08
DB
83 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
84// idct_simple_mmx_perm[i] = simple_block_permute_op(i);
9e1586fc
FB
85 }
86}
87
88static DCTELEM block[64] __attribute__ ((aligned (8)));
89static DCTELEM block1[64] __attribute__ ((aligned (8)));
86748dbc 90static DCTELEM block_org[64] __attribute__ ((aligned (8)));
9e1586fc
FB
91
92void dct_error(const char *name, int is_idct,
93 void (*fdct_func)(DCTELEM *block),
86748dbc 94 void (*fdct_ref)(DCTELEM *block), int test)
de6d9b64
FB
95{
96 int it, i, scale;
de6d9b64 97 int err_inf, v;
0c1a9eda
ZK
98 int64_t err2, ti, ti1, it1;
99 int64_t sysErr[64], sysErrMax=0;
86748dbc 100 int maxout=0;
86748dbc 101 int blockSumErrMax=0, blockSumErr;
de6d9b64
FB
102
103 srandom(0);
104
105 err_inf = 0;
106 err2 = 0;
86748dbc 107 for(i=0; i<64; i++) sysErr[i]=0;
de6d9b64 108 for(it=0;it<NB_ITS;it++) {
86748dbc
MN
109 for(i=0;i<64;i++)
110 block1[i] = 0;
111 switch(test){
115329f1 112 case 0:
86748dbc
MN
113 for(i=0;i<64;i++)
114 block1[i] = (random() % 512) -256;
ad324c93 115 if (is_idct){
86748dbc 116 fdct(block1);
ad324c93
MN
117
118 for(i=0;i<64;i++)
119 block1[i]>>=3;
120 }
86748dbc
MN
121 break;
122 case 1:{
123 int num= (random()%10)+1;
124 for(i=0;i<num;i++)
125 block1[random()%64] = (random() % 512) -256;
126 }break;
127 case 2:
128 block1[0]= (random()%4096)-2048;
129 block1[63]= (block1[0]&1)^1;
130 break;
131 }
9e1586fc 132
86748dbc
MN
133#if 0 // simulate mismatch control
134{ int sum=0;
135 for(i=0;i<64;i++)
136 sum+=block1[i];
137
115329f1 138 if((sum&1)==0) block1[63]^=1;
86748dbc
MN
139}
140#endif
141
142 for(i=0; i<64; i++)
143 block_org[i]= block1[i];
9e1586fc
FB
144
145 if (fdct_func == ff_mmx_idct ||
86748dbc
MN
146 fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) {
147 for(i=0;i<64;i++)
9e1586fc 148 block[idct_mmx_perm[i]] = block1[i];
a46a3ce4 149 } else if(fdct_func == ff_simple_idct_mmx ) {
86748dbc
MN
150 for(i=0;i<64;i++)
151 block[idct_simple_mmx_perm[i]] = block1[i];
152
bb270c08 153 } else {
86748dbc
MN
154 for(i=0; i<64; i++)
155 block[i]= block1[i];
9e1586fc 156 }
86748dbc
MN
157#if 0 // simulate mismatch control for tested IDCT but not the ref
158{ int sum=0;
159 for(i=0;i<64;i++)
160 sum+=block[i];
161
115329f1 162 if((sum&1)==0) block[63]^=1;
86748dbc
MN
163}
164#endif
9e1586fc 165
de6d9b64 166 fdct_func(block);
9e1586fc
FB
167 emms(); /* for ff_mmx_idct */
168
115329f1
DB
169 if (fdct_func == fdct_ifast
170#ifndef FAAN_POSTSCALE
b4c3816c
MN
171 || fdct_func == ff_faandct
172#endif
173 ) {
de6d9b64 174 for(i=0; i<64; i++) {
ad324c93 175 scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
86748dbc
MN
176 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
177 }
178 }
179
9e1586fc 180 fdct_ref(block1);
de6d9b64 181
86748dbc 182 blockSumErr=0;
de6d9b64
FB
183 for(i=0;i<64;i++) {
184 v = abs(block[i] - block1[i]);
185 if (v > err_inf)
186 err_inf = v;
187 err2 += v * v;
bb270c08
DB
188 sysErr[i] += block[i] - block1[i];
189 blockSumErr += v;
190 if( abs(block[i])>maxout) maxout=abs(block[i]);
de6d9b64 191 }
86748dbc
MN
192 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
193#if 0 // print different matrix pairs
194 if(blockSumErr){
195 printf("\n");
196 for(i=0; i<64; i++){
197 if((i&7)==0) printf("\n");
198 printf("%4d ", block_org[i]);
199 }
200 for(i=0; i<64; i++){
201 if((i&7)==0) printf("\n");
202 printf("%4d ", block[i] - block1[i]);
203 }
204 }
205#endif
206 }
207 for(i=0; i<64; i++) sysErrMax= MAX(sysErrMax, ABS(sysErr[i]));
115329f1 208
86748dbc
MN
209#if 1 // dump systematic errors
210 for(i=0; i<64; i++){
bb270c08 211 if(i%8==0) printf("\n");
86748dbc 212 printf("%5d ", (int)sysErr[i]);
de6d9b64 213 }
86748dbc
MN
214 printf("\n");
215#endif
115329f1 216
86748dbc 217 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
9e1586fc 218 is_idct ? "IDCT" : "DCT",
86748dbc
MN
219 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
220#if 1 //Speed test
de6d9b64 221 /* speed test */
86748dbc
MN
222 for(i=0;i<64;i++)
223 block1[i] = 0;
224 switch(test){
115329f1 225 case 0:
86748dbc
MN
226 for(i=0;i<64;i++)
227 block1[i] = (random() % 512) -256;
ad324c93 228 if (is_idct){
86748dbc 229 fdct(block1);
ad324c93
MN
230
231 for(i=0;i<64;i++)
232 block1[i]>>=3;
233 }
86748dbc
MN
234 break;
235 case 1:{
236 case 2:
237 block1[0] = (random() % 512) -256;
238 block1[1] = (random() % 512) -256;
239 block1[2] = (random() % 512) -256;
240 block1[3] = (random() % 512) -256;
241 }break;
242 }
de6d9b64 243
9e1586fc 244 if (fdct_func == ff_mmx_idct ||
86748dbc
MN
245 fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) {
246 for(i=0;i<64;i++)
9e1586fc 247 block[idct_mmx_perm[i]] = block1[i];
a46a3ce4 248 } else if(fdct_func == ff_simple_idct_mmx ) {
86748dbc
MN
249 for(i=0;i<64;i++)
250 block[idct_simple_mmx_perm[i]] = block1[i];
251 } else {
252 for(i=0; i<64; i++)
253 block[i]= block1[i];
9e1586fc
FB
254 }
255
de6d9b64
FB
256 ti = gettime();
257 it1 = 0;
258 do {
259 for(it=0;it<NB_ITS_SPEED;it++) {
86748dbc
MN
260 for(i=0; i<64; i++)
261 block[i]= block1[i];
262// memcpy(block, block1, sizeof(DCTELEM) * 64);
263// dont memcpy especially not fastmemcpy because it does movntq !!!
de6d9b64
FB
264 fdct_func(block);
265 }
266 it1 += NB_ITS_SPEED;
267 ti1 = gettime() - ti;
268 } while (ti1 < 1000000);
9e1586fc 269 emms();
de6d9b64 270
86748dbc 271 printf("%s %s: %0.1f kdct/s\n",
9e1586fc 272 is_idct ? "IDCT" : "DCT",
de6d9b64 273 name, (double)it1 * 1000.0 / (double)ti1);
86748dbc 274#endif
de6d9b64
FB
275}
276
0c1a9eda
ZK
277static uint8_t img_dest[64] __attribute__ ((aligned (8)));
278static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
a46a3ce4 279
0c1a9eda 280void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
a46a3ce4
FB
281{
282 static int init;
283 static double c8[8][8];
284 static double c4[4][4];
285 double block1[64], block2[64], block3[64];
286 double s, sum, v;
287 int i, j, k;
288
289 if (!init) {
290 init = 1;
291
292 for(i=0;i<8;i++) {
293 sum = 0;
294 for(j=0;j<8;j++) {
295 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
296 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
297 sum += c8[i][j] * c8[i][j];
298 }
299 }
115329f1 300
a46a3ce4
FB
301 for(i=0;i<4;i++) {
302 sum = 0;
303 for(j=0;j<4;j++) {
304 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
305 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
306 sum += c4[i][j] * c4[i][j];
307 }
308 }
309 }
310
311 /* butterfly */
652f0197 312 s = 0.5 * sqrt(2.0);
a46a3ce4
FB
313 for(i=0;i<4;i++) {
314 for(j=0;j<8;j++) {
652f0197
FB
315 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
316 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
a46a3ce4
FB
317 }
318 }
319
320 /* idct8 on lines */
321 for(i=0;i<8;i++) {
322 for(j=0;j<8;j++) {
323 sum = 0;
324 for(k=0;k<8;k++)
325 sum += c8[k][j] * block1[8*i+k];
326 block2[8*i+j] = sum;
327 }
328 }
329
330 /* idct4 */
331 for(i=0;i<8;i++) {
332 for(j=0;j<4;j++) {
333 /* top */
334 sum = 0;
335 for(k=0;k<4;k++)
336 sum += c4[k][j] * block2[8*(2*k)+i];
337 block3[8*(2*j)+i] = sum;
338
339 /* bottom */
340 sum = 0;
341 for(k=0;k<4;k++)
342 sum += c4[k][j] * block2[8*(2*k+1)+i];
343 block3[8*(2*j+1)+i] = sum;
344 }
345 }
346
347 /* clamp and store the result */
348 for(i=0;i<8;i++) {
349 for(j=0;j<8;j++) {
652f0197 350 v = block3[8*i+j];
a46a3ce4
FB
351 if (v < 0)
352 v = 0;
353 else if (v > 255)
354 v = 255;
355 dest[i * linesize + j] = (int)rint(v);
356 }
357 }
358}
359
115329f1 360void idct248_error(const char *name,
0c1a9eda 361 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
a46a3ce4
FB
362{
363 int it, i, it1, ti, ti1, err_max, v;
364
365 srandom(0);
115329f1 366
a46a3ce4
FB
367 /* just one test to see if code is correct (precision is less
368 important here) */
369 err_max = 0;
370 for(it=0;it<NB_ITS;it++) {
115329f1 371
652f0197
FB
372 /* XXX: use forward transform to generate values */
373 for(i=0;i<64;i++)
374 block1[i] = (random() % 256) - 128;
375 block1[0] += 1024;
376
a46a3ce4
FB
377 for(i=0; i<64; i++)
378 block[i]= block1[i];
379 idct248_ref(img_dest1, 8, block);
115329f1 380
652f0197
FB
381 for(i=0; i<64; i++)
382 block[i]= block1[i];
383 idct248_put(img_dest, 8, block);
115329f1 384
652f0197
FB
385 for(i=0;i<64;i++) {
386 v = abs((int)img_dest[i] - (int)img_dest1[i]);
387 if (v == 255)
388 printf("%d %d\n", img_dest[i], img_dest1[i]);
389 if (v > err_max)
390 err_max = v;
391 }
a46a3ce4
FB
392#if 0
393 printf("ref=\n");
394 for(i=0;i<8;i++) {
395 int j;
396 for(j=0;j<8;j++) {
397 printf(" %3d", img_dest1[i*8+j]);
398 }
399 printf("\n");
400 }
115329f1 401
a46a3ce4
FB
402 printf("out=\n");
403 for(i=0;i<8;i++) {
404 int j;
405 for(j=0;j<8;j++) {
406 printf(" %3d", img_dest[i*8+j]);
407 }
408 printf("\n");
409 }
410#endif
a46a3ce4
FB
411 }
412 printf("%s %s: err_inf=%d\n",
413 1 ? "IDCT248" : "DCT248",
414 name, err_max);
415
416 ti = gettime();
417 it1 = 0;
418 do {
419 for(it=0;it<NB_ITS_SPEED;it++) {
420 for(i=0; i<64; i++)
421 block[i]= block1[i];
422// memcpy(block, block1, sizeof(DCTELEM) * 64);
423// dont memcpy especially not fastmemcpy because it does movntq !!!
424 idct248_put(img_dest, 8, block);
425 }
426 it1 += NB_ITS_SPEED;
427 ti1 = gettime() - ti;
428 } while (ti1 < 1000000);
429 emms();
430
431 printf("%s %s: %0.1f kdct/s\n",
432 1 ? "IDCT248" : "DCT248",
433 name, (double)it1 * 1000.0 / (double)ti1);
434}
435
9e1586fc
FB
436void help(void)
437{
86748dbc
MN
438 printf("dct-test [-i] [<test-number>]\n"
439 "test-number 0 -> test with random matrixes\n"
440 " 1 -> test with random sparse matrixes\n"
441 " 2 -> do 3. test from mpeg4 std\n"
a46a3ce4
FB
442 "-i test IDCT implementations\n"
443 "-4 test IDCT248 implementations\n");
9e1586fc
FB
444 exit(1);
445}
446
de6d9b64
FB
447int main(int argc, char **argv)
448{
a46a3ce4 449 int test_idct = 0, test_248_dct = 0;
86748dbc
MN
450 int c,i;
451 int test=1;
9e1586fc 452
de6d9b64 453 init_fdct();
9e1586fc 454 idct_mmx_init();
de6d9b64 455
86748dbc
MN
456 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
457 for(i=0;i<MAX_NEG_CROP;i++) {
458 cropTbl[i] = 0;
459 cropTbl[i + MAX_NEG_CROP + 256] = 255;
460 }
115329f1 461
9e1586fc 462 for(;;) {
a46a3ce4 463 c = getopt(argc, argv, "ih4");
9e1586fc
FB
464 if (c == -1)
465 break;
466 switch(c) {
467 case 'i':
468 test_idct = 1;
469 break;
a46a3ce4
FB
470 case '4':
471 test_248_dct = 1;
472 break;
86748dbc 473 default :
9e1586fc
FB
474 case 'h':
475 help();
476 break;
477 }
478 }
115329f1 479
86748dbc 480 if(optind <argc) test= atoi(argv[optind]);
115329f1 481
9e1586fc
FB
482 printf("ffmpeg DCT/IDCT test\n");
483
a46a3ce4
FB
484 if (test_248_dct) {
485 idct248_error("SIMPLE-C", simple_idct248_put);
9e1586fc 486 } else {
a46a3ce4
FB
487 if (!test_idct) {
488 dct_error("REF-DBL", 0, fdct, fdct, test); /* only to verify code ! */
489 dct_error("IJG-AAN-INT", 0, fdct_ifast, fdct, test);
490 dct_error("IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, test);
491 dct_error("MMX", 0, ff_fdct_mmx, fdct, test);
94789b9e 492 dct_error("MMX2", 0, ff_fdct_mmx2, fdct, test);
65e4c8c9 493 dct_error("FAAN", 0, ff_faandct, fdct, test);
a46a3ce4
FB
494 } else {
495 dct_error("REF-DBL", 1, idct, idct, test);
496 dct_error("INT", 1, j_rev_dct, idct, test);
497 dct_error("LIBMPEG2-MMX", 1, ff_mmx_idct, idct, test);
498 dct_error("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, test);
499 dct_error("SIMPLE-C", 1, simple_idct, idct, test);
500 dct_error("SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, test);
434df899
MN
501 dct_error("XVID-MMX", 1, ff_idct_xvid_mmx, idct, test);
502 dct_error("XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, test);
a46a3ce4
FB
503 // dct_error("ODIVX-C", 1, odivx_idct_c, idct);
504 //printf(" test against odivx idct\n");
bb270c08 505 // dct_error("REF", 1, idct, odivx_idct_c);
a46a3ce4
FB
506 // dct_error("INT", 1, j_rev_dct, odivx_idct_c);
507 // dct_error("MMX", 1, ff_mmx_idct, odivx_idct_c);
508 // dct_error("MMXEXT", 1, ff_mmxext_idct, odivx_idct_c);
509 // dct_error("SIMPLE-C", 1, simple_idct, odivx_idct_c);
510 // dct_error("SIMPLE-MMX", 1, ff_simple_idct_mmx, odivx_idct_c);
511 // dct_error("ODIVX-C", 1, odivx_idct_c, odivx_idct_c);
512 }
9e1586fc 513 }
de6d9b64
FB
514 return 0;
515}