() 10l
[libav.git] / libavcodec / dct-test.c
CommitLineData
04d7f601
DB
1/*
2 * (c) 2001 Fabrice Bellard
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
983e3246
MN
19/**
20 * @file dct-test.c
115329f1 21 * DCT test. (c) 2001 Fabrice Bellard.
983e3246
MN
22 * Started from sample code by Juan J. Sierralta P.
23 */
24
de6d9b64
FB
25#include <stdlib.h>
26#include <stdio.h>
27#include <string.h>
28#include <sys/time.h>
29#include <unistd.h>
30
31#include "dsputil.h"
32
86748dbc 33#include "simple_idct.h"
65e4c8c9 34#include "faandct.h"
9e1586fc 35
e366e679
FB
36#ifndef MAX
37#define MAX(a, b) (((a) > (b)) ? (a) : (b))
38#endif
39
434df899
MN
40#undef printf
41
42void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
43
9e1586fc 44/* reference fdct/idct */
de6d9b64 45extern void fdct(DCTELEM *block);
9e1586fc 46extern void idct(DCTELEM *block);
434df899
MN
47extern void ff_idct_xvid_mmx(DCTELEM *block);
48extern void ff_idct_xvid_mmx2(DCTELEM *block);
de6d9b64
FB
49extern void init_fdct();
50
9e1586fc
FB
51extern void j_rev_dct(DCTELEM *data);
52extern void ff_mmx_idct(DCTELEM *data);
53extern void ff_mmxext_idct(DCTELEM *data);
54
86748dbc
MN
55extern void odivx_idct_c (short *block);
56
de6d9b64
FB
57#define AANSCALE_BITS 12
58static const unsigned short aanscales[64] = {
59 /* precomputed values scaled up by 14 bits */
60 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
61 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
62 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
63 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
64 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
65 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
66 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
67 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
68};
69
0c1a9eda 70uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
86748dbc 71
0c1a9eda 72int64_t gettime(void)
de6d9b64
FB
73{
74 struct timeval tv;
75 gettimeofday(&tv,NULL);
0c1a9eda 76 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
de6d9b64
FB
77}
78
79#define NB_ITS 20000
80#define NB_ITS_SPEED 50000
81
9e1586fc
FB
82static short idct_mmx_perm[64];
83
86748dbc 84static short idct_simple_mmx_perm[64]={
bb270c08
DB
85 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
86 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
87 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
88 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
89 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
90 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
91 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
92 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
86748dbc
MN
93};
94
9e1586fc
FB
95void idct_mmx_init(void)
96{
97 int i;
98
99 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
100 for (i = 0; i < 64; i++) {
bb270c08
DB
101 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
102// idct_simple_mmx_perm[i] = simple_block_permute_op(i);
9e1586fc
FB
103 }
104}
105
106static DCTELEM block[64] __attribute__ ((aligned (8)));
107static DCTELEM block1[64] __attribute__ ((aligned (8)));
86748dbc 108static DCTELEM block_org[64] __attribute__ ((aligned (8)));
9e1586fc
FB
109
110void dct_error(const char *name, int is_idct,
111 void (*fdct_func)(DCTELEM *block),
86748dbc 112 void (*fdct_ref)(DCTELEM *block), int test)
de6d9b64
FB
113{
114 int it, i, scale;
de6d9b64 115 int err_inf, v;
0c1a9eda
ZK
116 int64_t err2, ti, ti1, it1;
117 int64_t sysErr[64], sysErrMax=0;
86748dbc 118 int maxout=0;
86748dbc 119 int blockSumErrMax=0, blockSumErr;
de6d9b64
FB
120
121 srandom(0);
122
123 err_inf = 0;
124 err2 = 0;
86748dbc 125 for(i=0; i<64; i++) sysErr[i]=0;
de6d9b64 126 for(it=0;it<NB_ITS;it++) {
86748dbc
MN
127 for(i=0;i<64;i++)
128 block1[i] = 0;
129 switch(test){
115329f1 130 case 0:
86748dbc
MN
131 for(i=0;i<64;i++)
132 block1[i] = (random() % 512) -256;
ad324c93 133 if (is_idct){
86748dbc 134 fdct(block1);
ad324c93
MN
135
136 for(i=0;i<64;i++)
137 block1[i]>>=3;
138 }
86748dbc
MN
139 break;
140 case 1:{
141 int num= (random()%10)+1;
142 for(i=0;i<num;i++)
143 block1[random()%64] = (random() % 512) -256;
144 }break;
145 case 2:
146 block1[0]= (random()%4096)-2048;
147 block1[63]= (block1[0]&1)^1;
148 break;
149 }
9e1586fc 150
86748dbc
MN
151#if 0 // simulate mismatch control
152{ int sum=0;
153 for(i=0;i<64;i++)
154 sum+=block1[i];
155
115329f1 156 if((sum&1)==0) block1[63]^=1;
86748dbc
MN
157}
158#endif
159
160 for(i=0; i<64; i++)
161 block_org[i]= block1[i];
9e1586fc
FB
162
163 if (fdct_func == ff_mmx_idct ||
86748dbc
MN
164 fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) {
165 for(i=0;i<64;i++)
9e1586fc 166 block[idct_mmx_perm[i]] = block1[i];
a46a3ce4 167 } else if(fdct_func == ff_simple_idct_mmx ) {
86748dbc
MN
168 for(i=0;i<64;i++)
169 block[idct_simple_mmx_perm[i]] = block1[i];
170
bb270c08 171 } else {
86748dbc
MN
172 for(i=0; i<64; i++)
173 block[i]= block1[i];
9e1586fc 174 }
86748dbc
MN
175#if 0 // simulate mismatch control for tested IDCT but not the ref
176{ int sum=0;
177 for(i=0;i<64;i++)
178 sum+=block[i];
179
115329f1 180 if((sum&1)==0) block[63]^=1;
86748dbc
MN
181}
182#endif
9e1586fc 183
de6d9b64 184 fdct_func(block);
9e1586fc
FB
185 emms(); /* for ff_mmx_idct */
186
115329f1
DB
187 if (fdct_func == fdct_ifast
188#ifndef FAAN_POSTSCALE
b4c3816c
MN
189 || fdct_func == ff_faandct
190#endif
191 ) {
de6d9b64 192 for(i=0; i<64; i++) {
ad324c93 193 scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
86748dbc
MN
194 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
195 }
196 }
197
9e1586fc 198 fdct_ref(block1);
de6d9b64 199
86748dbc 200 blockSumErr=0;
de6d9b64
FB
201 for(i=0;i<64;i++) {
202 v = abs(block[i] - block1[i]);
203 if (v > err_inf)
204 err_inf = v;
205 err2 += v * v;
bb270c08
DB
206 sysErr[i] += block[i] - block1[i];
207 blockSumErr += v;
208 if( abs(block[i])>maxout) maxout=abs(block[i]);
de6d9b64 209 }
86748dbc
MN
210 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
211#if 0 // print different matrix pairs
212 if(blockSumErr){
213 printf("\n");
214 for(i=0; i<64; i++){
215 if((i&7)==0) printf("\n");
216 printf("%4d ", block_org[i]);
217 }
218 for(i=0; i<64; i++){
219 if((i&7)==0) printf("\n");
220 printf("%4d ", block[i] - block1[i]);
221 }
222 }
223#endif
224 }
225 for(i=0; i<64; i++) sysErrMax= MAX(sysErrMax, ABS(sysErr[i]));
115329f1 226
86748dbc
MN
227#if 1 // dump systematic errors
228 for(i=0; i<64; i++){
bb270c08 229 if(i%8==0) printf("\n");
86748dbc 230 printf("%5d ", (int)sysErr[i]);
de6d9b64 231 }
86748dbc
MN
232 printf("\n");
233#endif
115329f1 234
86748dbc 235 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
9e1586fc 236 is_idct ? "IDCT" : "DCT",
86748dbc
MN
237 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
238#if 1 //Speed test
de6d9b64 239 /* speed test */
86748dbc
MN
240 for(i=0;i<64;i++)
241 block1[i] = 0;
242 switch(test){
115329f1 243 case 0:
86748dbc
MN
244 for(i=0;i<64;i++)
245 block1[i] = (random() % 512) -256;
ad324c93 246 if (is_idct){
86748dbc 247 fdct(block1);
ad324c93
MN
248
249 for(i=0;i<64;i++)
250 block1[i]>>=3;
251 }
86748dbc
MN
252 break;
253 case 1:{
254 case 2:
255 block1[0] = (random() % 512) -256;
256 block1[1] = (random() % 512) -256;
257 block1[2] = (random() % 512) -256;
258 block1[3] = (random() % 512) -256;
259 }break;
260 }
de6d9b64 261
9e1586fc 262 if (fdct_func == ff_mmx_idct ||
86748dbc
MN
263 fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) {
264 for(i=0;i<64;i++)
9e1586fc 265 block[idct_mmx_perm[i]] = block1[i];
a46a3ce4 266 } else if(fdct_func == ff_simple_idct_mmx ) {
86748dbc
MN
267 for(i=0;i<64;i++)
268 block[idct_simple_mmx_perm[i]] = block1[i];
269 } else {
270 for(i=0; i<64; i++)
271 block[i]= block1[i];
9e1586fc
FB
272 }
273
de6d9b64
FB
274 ti = gettime();
275 it1 = 0;
276 do {
277 for(it=0;it<NB_ITS_SPEED;it++) {
86748dbc
MN
278 for(i=0; i<64; i++)
279 block[i]= block1[i];
280// memcpy(block, block1, sizeof(DCTELEM) * 64);
281// dont memcpy especially not fastmemcpy because it does movntq !!!
de6d9b64
FB
282 fdct_func(block);
283 }
284 it1 += NB_ITS_SPEED;
285 ti1 = gettime() - ti;
286 } while (ti1 < 1000000);
9e1586fc 287 emms();
de6d9b64 288
86748dbc 289 printf("%s %s: %0.1f kdct/s\n",
9e1586fc 290 is_idct ? "IDCT" : "DCT",
de6d9b64 291 name, (double)it1 * 1000.0 / (double)ti1);
86748dbc 292#endif
de6d9b64
FB
293}
294
0c1a9eda
ZK
295static uint8_t img_dest[64] __attribute__ ((aligned (8)));
296static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
a46a3ce4 297
0c1a9eda 298void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
a46a3ce4
FB
299{
300 static int init;
301 static double c8[8][8];
302 static double c4[4][4];
303 double block1[64], block2[64], block3[64];
304 double s, sum, v;
305 int i, j, k;
306
307 if (!init) {
308 init = 1;
309
310 for(i=0;i<8;i++) {
311 sum = 0;
312 for(j=0;j<8;j++) {
313 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
314 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
315 sum += c8[i][j] * c8[i][j];
316 }
317 }
115329f1 318
a46a3ce4
FB
319 for(i=0;i<4;i++) {
320 sum = 0;
321 for(j=0;j<4;j++) {
322 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
323 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
324 sum += c4[i][j] * c4[i][j];
325 }
326 }
327 }
328
329 /* butterfly */
652f0197 330 s = 0.5 * sqrt(2.0);
a46a3ce4
FB
331 for(i=0;i<4;i++) {
332 for(j=0;j<8;j++) {
652f0197
FB
333 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
334 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
a46a3ce4
FB
335 }
336 }
337
338 /* idct8 on lines */
339 for(i=0;i<8;i++) {
340 for(j=0;j<8;j++) {
341 sum = 0;
342 for(k=0;k<8;k++)
343 sum += c8[k][j] * block1[8*i+k];
344 block2[8*i+j] = sum;
345 }
346 }
347
348 /* idct4 */
349 for(i=0;i<8;i++) {
350 for(j=0;j<4;j++) {
351 /* top */
352 sum = 0;
353 for(k=0;k<4;k++)
354 sum += c4[k][j] * block2[8*(2*k)+i];
355 block3[8*(2*j)+i] = sum;
356
357 /* bottom */
358 sum = 0;
359 for(k=0;k<4;k++)
360 sum += c4[k][j] * block2[8*(2*k+1)+i];
361 block3[8*(2*j+1)+i] = sum;
362 }
363 }
364
365 /* clamp and store the result */
366 for(i=0;i<8;i++) {
367 for(j=0;j<8;j++) {
652f0197 368 v = block3[8*i+j];
a46a3ce4
FB
369 if (v < 0)
370 v = 0;
371 else if (v > 255)
372 v = 255;
373 dest[i * linesize + j] = (int)rint(v);
374 }
375 }
376}
377
115329f1 378void idct248_error(const char *name,
0c1a9eda 379 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
a46a3ce4
FB
380{
381 int it, i, it1, ti, ti1, err_max, v;
382
383 srandom(0);
115329f1 384
a46a3ce4
FB
385 /* just one test to see if code is correct (precision is less
386 important here) */
387 err_max = 0;
388 for(it=0;it<NB_ITS;it++) {
115329f1 389
652f0197
FB
390 /* XXX: use forward transform to generate values */
391 for(i=0;i<64;i++)
392 block1[i] = (random() % 256) - 128;
393 block1[0] += 1024;
394
a46a3ce4
FB
395 for(i=0; i<64; i++)
396 block[i]= block1[i];
397 idct248_ref(img_dest1, 8, block);
115329f1 398
652f0197
FB
399 for(i=0; i<64; i++)
400 block[i]= block1[i];
401 idct248_put(img_dest, 8, block);
115329f1 402
652f0197
FB
403 for(i=0;i<64;i++) {
404 v = abs((int)img_dest[i] - (int)img_dest1[i]);
405 if (v == 255)
406 printf("%d %d\n", img_dest[i], img_dest1[i]);
407 if (v > err_max)
408 err_max = v;
409 }
a46a3ce4
FB
410#if 0
411 printf("ref=\n");
412 for(i=0;i<8;i++) {
413 int j;
414 for(j=0;j<8;j++) {
415 printf(" %3d", img_dest1[i*8+j]);
416 }
417 printf("\n");
418 }
115329f1 419
a46a3ce4
FB
420 printf("out=\n");
421 for(i=0;i<8;i++) {
422 int j;
423 for(j=0;j<8;j++) {
424 printf(" %3d", img_dest[i*8+j]);
425 }
426 printf("\n");
427 }
428#endif
a46a3ce4
FB
429 }
430 printf("%s %s: err_inf=%d\n",
431 1 ? "IDCT248" : "DCT248",
432 name, err_max);
433
434 ti = gettime();
435 it1 = 0;
436 do {
437 for(it=0;it<NB_ITS_SPEED;it++) {
438 for(i=0; i<64; i++)
439 block[i]= block1[i];
440// memcpy(block, block1, sizeof(DCTELEM) * 64);
441// dont memcpy especially not fastmemcpy because it does movntq !!!
442 idct248_put(img_dest, 8, block);
443 }
444 it1 += NB_ITS_SPEED;
445 ti1 = gettime() - ti;
446 } while (ti1 < 1000000);
447 emms();
448
449 printf("%s %s: %0.1f kdct/s\n",
450 1 ? "IDCT248" : "DCT248",
451 name, (double)it1 * 1000.0 / (double)ti1);
452}
453
9e1586fc
FB
454void help(void)
455{
86748dbc
MN
456 printf("dct-test [-i] [<test-number>]\n"
457 "test-number 0 -> test with random matrixes\n"
458 " 1 -> test with random sparse matrixes\n"
459 " 2 -> do 3. test from mpeg4 std\n"
a46a3ce4
FB
460 "-i test IDCT implementations\n"
461 "-4 test IDCT248 implementations\n");
9e1586fc
FB
462 exit(1);
463}
464
de6d9b64
FB
465int main(int argc, char **argv)
466{
a46a3ce4 467 int test_idct = 0, test_248_dct = 0;
86748dbc
MN
468 int c,i;
469 int test=1;
9e1586fc 470
de6d9b64 471 init_fdct();
9e1586fc 472 idct_mmx_init();
de6d9b64 473
86748dbc
MN
474 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
475 for(i=0;i<MAX_NEG_CROP;i++) {
476 cropTbl[i] = 0;
477 cropTbl[i + MAX_NEG_CROP + 256] = 255;
478 }
115329f1 479
9e1586fc 480 for(;;) {
a46a3ce4 481 c = getopt(argc, argv, "ih4");
9e1586fc
FB
482 if (c == -1)
483 break;
484 switch(c) {
485 case 'i':
486 test_idct = 1;
487 break;
a46a3ce4
FB
488 case '4':
489 test_248_dct = 1;
490 break;
86748dbc 491 default :
9e1586fc
FB
492 case 'h':
493 help();
494 break;
495 }
496 }
115329f1 497
86748dbc 498 if(optind <argc) test= atoi(argv[optind]);
115329f1 499
9e1586fc
FB
500 printf("ffmpeg DCT/IDCT test\n");
501
a46a3ce4
FB
502 if (test_248_dct) {
503 idct248_error("SIMPLE-C", simple_idct248_put);
9e1586fc 504 } else {
a46a3ce4
FB
505 if (!test_idct) {
506 dct_error("REF-DBL", 0, fdct, fdct, test); /* only to verify code ! */
507 dct_error("IJG-AAN-INT", 0, fdct_ifast, fdct, test);
508 dct_error("IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, test);
509 dct_error("MMX", 0, ff_fdct_mmx, fdct, test);
94789b9e 510 dct_error("MMX2", 0, ff_fdct_mmx2, fdct, test);
65e4c8c9 511 dct_error("FAAN", 0, ff_faandct, fdct, test);
a46a3ce4
FB
512 } else {
513 dct_error("REF-DBL", 1, idct, idct, test);
514 dct_error("INT", 1, j_rev_dct, idct, test);
515 dct_error("LIBMPEG2-MMX", 1, ff_mmx_idct, idct, test);
516 dct_error("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, test);
517 dct_error("SIMPLE-C", 1, simple_idct, idct, test);
518 dct_error("SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, test);
434df899
MN
519 dct_error("XVID-MMX", 1, ff_idct_xvid_mmx, idct, test);
520 dct_error("XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, test);
a46a3ce4
FB
521 // dct_error("ODIVX-C", 1, odivx_idct_c, idct);
522 //printf(" test against odivx idct\n");
bb270c08 523 // dct_error("REF", 1, idct, odivx_idct_c);
a46a3ce4
FB
524 // dct_error("INT", 1, j_rev_dct, odivx_idct_c);
525 // dct_error("MMX", 1, ff_mmx_idct, odivx_idct_c);
526 // dct_error("MMXEXT", 1, ff_mmxext_idct, odivx_idct_c);
527 // dct_error("SIMPLE-C", 1, simple_idct, odivx_idct_c);
528 // dct_error("SIMPLE-MMX", 1, ff_simple_idct_mmx, odivx_idct_c);
529 // dct_error("ODIVX-C", 1, odivx_idct_c, odivx_idct_c);
530 }
9e1586fc 531 }
de6d9b64
FB
532 return 0;
533}