Fix compilation when MMX is disabled.
[libav.git] / libavcodec / dct-test.c
1 /*
2 * (c) 2001 Fabrice Bellard
3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file dct-test.c
24 * DCT test. (c) 2001 Fabrice Bellard.
25 * Started from sample code by Juan J. Sierralta P.
26 */
27
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <string.h>
31 #include <sys/time.h>
32 #include <unistd.h>
33
34 #include "dsputil.h"
35
36 #include "simple_idct.h"
37 #include "faandct.h"
38
39 #ifndef MAX
40 #define MAX(a, b) (((a) > (b)) ? (a) : (b))
41 #endif
42
43 #undef printf
44
45 void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
46
47 /* reference fdct/idct */
48 extern void fdct(DCTELEM *block);
49 extern void idct(DCTELEM *block);
50 extern void ff_idct_xvid_mmx(DCTELEM *block);
51 extern void ff_idct_xvid_mmx2(DCTELEM *block);
52 extern void init_fdct();
53
54 extern void ff_mmx_idct(DCTELEM *data);
55 extern void ff_mmxext_idct(DCTELEM *data);
56
57 extern void odivx_idct_c (short *block);
58
59 // BFIN
60 extern void ff_bfin_idct (DCTELEM *block) ;
61 extern void ff_bfin_fdct (DCTELEM *block) ;
62
63 // ALTIVEC
64 extern void fdct_altivec (DCTELEM *block);
65 //extern void idct_altivec (DCTELEM *block);?? no routine
66
67
68 struct algo {
69 char *name;
70 enum { FDCT, IDCT } is_idct;
71 void (* func) (DCTELEM *block);
72 void (* ref) (DCTELEM *block);
73 enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM } format;
74 };
75
76 #ifndef FAAN_POSTSCALE
77 #define FAAN_SCALE SCALE_PERM
78 #else
79 #define FAAN_SCALE NO_PERM
80 #endif
81
82 #define DCT_ERROR(name,is_idct,func,ref,form) {name,is_idct,func,ref,form}
83
84
85 struct algo algos[] = {
86 DCT_ERROR( "REF-DBL", 0, fdct, fdct, NO_PERM),
87 DCT_ERROR("FAAN", 0, ff_faandct, fdct, FAAN_SCALE),
88 DCT_ERROR("IJG-AAN-INT", 0, fdct_ifast, fdct, SCALE_PERM),
89 DCT_ERROR("IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, NO_PERM),
90 DCT_ERROR("REF-DBL", 1, idct, idct, NO_PERM),
91 DCT_ERROR("INT", 1, j_rev_dct, idct, MMX_PERM),
92 DCT_ERROR("SIMPLE-C", 1, simple_idct, idct, NO_PERM),
93
94 #ifdef HAVE_MMX
95 DCT_ERROR("MMX", 0, ff_fdct_mmx, fdct, NO_PERM),
96 DCT_ERROR("MMX2", 0, ff_fdct_mmx2, fdct, NO_PERM),
97
98 DCT_ERROR("LIBMPEG2-MMX", 1, ff_mmx_idct, idct, MMX_PERM),
99 DCT_ERROR("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, MMX_PERM),
100 DCT_ERROR("SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM),
101 DCT_ERROR("XVID-MMX", 1, ff_idct_xvid_mmx, idct, NO_PERM),
102 DCT_ERROR("XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, NO_PERM),
103 #endif
104
105 #ifdef HAVE_ALTIVEC
106 DCT_ERROR("altivecfdct", 0, fdct_altivec, fdct, NO_PERM),
107 #endif
108
109 #ifdef ARCH_BFIN
110 DCT_ERROR("BFINfdct", 0, ff_bfin_fdct, fdct, NO_PERM),
111 DCT_ERROR("BFINidct", 1, ff_bfin_idct, idct, NO_PERM),
112 #endif
113
114 { 0 }
115 };
116
117 #define AANSCALE_BITS 12
118 static const unsigned short aanscales[64] = {
119 /* precomputed values scaled up by 14 bits */
120 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
121 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
122 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
123 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
124 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
125 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
126 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
127 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
128 };
129
130 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
131
132 int64_t gettime(void)
133 {
134 struct timeval tv;
135 gettimeofday(&tv,NULL);
136 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
137 }
138
139 #define NB_ITS 20000
140 #define NB_ITS_SPEED 50000
141
142 static short idct_mmx_perm[64];
143
144 static short idct_simple_mmx_perm[64]={
145 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
146 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
147 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
148 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
149 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
150 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
151 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
152 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
153 };
154
155 void idct_mmx_init(void)
156 {
157 int i;
158
159 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
160 for (i = 0; i < 64; i++) {
161 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
162 // idct_simple_mmx_perm[i] = simple_block_permute_op(i);
163 }
164 }
165
166 static DCTELEM block[64] __attribute__ ((aligned (8)));
167 static DCTELEM block1[64] __attribute__ ((aligned (8)));
168 static DCTELEM block_org[64] __attribute__ ((aligned (8)));
169
170 void dct_error(const char *name, int is_idct,
171 void (*fdct_func)(DCTELEM *block),
172 void (*fdct_ref)(DCTELEM *block), int form, int test)
173 {
174 int it, i, scale;
175 int err_inf, v;
176 int64_t err2, ti, ti1, it1;
177 int64_t sysErr[64], sysErrMax=0;
178 int maxout=0;
179 int blockSumErrMax=0, blockSumErr;
180
181 srandom(0);
182
183 err_inf = 0;
184 err2 = 0;
185 for(i=0; i<64; i++) sysErr[i]=0;
186 for(it=0;it<NB_ITS;it++) {
187 for(i=0;i<64;i++)
188 block1[i] = 0;
189 switch(test){
190 case 0:
191 for(i=0;i<64;i++)
192 block1[i] = (random() % 512) -256;
193 if (is_idct){
194 fdct(block1);
195
196 for(i=0;i<64;i++)
197 block1[i]>>=3;
198 }
199 break;
200 case 1:{
201 int num= (random()%10)+1;
202 for(i=0;i<num;i++)
203 block1[random()%64] = (random() % 512) -256;
204 }break;
205 case 2:
206 block1[0]= (random()%4096)-2048;
207 block1[63]= (block1[0]&1)^1;
208 break;
209 }
210
211 #if 0 // simulate mismatch control
212 { int sum=0;
213 for(i=0;i<64;i++)
214 sum+=block1[i];
215
216 if((sum&1)==0) block1[63]^=1;
217 }
218 #endif
219
220 for(i=0; i<64; i++)
221 block_org[i]= block1[i];
222
223 if (form == MMX_PERM) {
224 for(i=0;i<64;i++)
225 block[idct_mmx_perm[i]] = block1[i];
226 } else if (form == MMX_SIMPLE_PERM) {
227 for(i=0;i<64;i++)
228 block[idct_simple_mmx_perm[i]] = block1[i];
229
230 } else {
231 for(i=0; i<64; i++)
232 block[i]= block1[i];
233 }
234 #if 0 // simulate mismatch control for tested IDCT but not the ref
235 { int sum=0;
236 for(i=0;i<64;i++)
237 sum+=block[i];
238
239 if((sum&1)==0) block[63]^=1;
240 }
241 #endif
242
243 fdct_func(block);
244 emms_c(); /* for ff_mmx_idct */
245
246 if (form == SCALE_PERM) {
247 for(i=0; i<64; i++) {
248 scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
249 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
250 }
251 }
252
253 fdct_ref(block1);
254
255 blockSumErr=0;
256 for(i=0;i<64;i++) {
257 v = abs(block[i] - block1[i]);
258 if (v > err_inf)
259 err_inf = v;
260 err2 += v * v;
261 sysErr[i] += block[i] - block1[i];
262 blockSumErr += v;
263 if( abs(block[i])>maxout) maxout=abs(block[i]);
264 }
265 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
266 #if 0 // print different matrix pairs
267 if(blockSumErr){
268 printf("\n");
269 for(i=0; i<64; i++){
270 if((i&7)==0) printf("\n");
271 printf("%4d ", block_org[i]);
272 }
273 for(i=0; i<64; i++){
274 if((i&7)==0) printf("\n");
275 printf("%4d ", block[i] - block1[i]);
276 }
277 }
278 #endif
279 }
280 for(i=0; i<64; i++) sysErrMax= MAX(sysErrMax, FFABS(sysErr[i]));
281
282 #if 1 // dump systematic errors
283 for(i=0; i<64; i++){
284 if(i%8==0) printf("\n");
285 printf("%5d ", (int)sysErr[i]);
286 }
287 printf("\n");
288 #endif
289
290 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
291 is_idct ? "IDCT" : "DCT",
292 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
293 #if 1 //Speed test
294 /* speed test */
295 for(i=0;i<64;i++)
296 block1[i] = 0;
297 switch(test){
298 case 0:
299 for(i=0;i<64;i++)
300 block1[i] = (random() % 512) -256;
301 if (is_idct){
302 fdct(block1);
303
304 for(i=0;i<64;i++)
305 block1[i]>>=3;
306 }
307 break;
308 case 1:{
309 case 2:
310 block1[0] = (random() % 512) -256;
311 block1[1] = (random() % 512) -256;
312 block1[2] = (random() % 512) -256;
313 block1[3] = (random() % 512) -256;
314 }break;
315 }
316
317 if (form == MMX_PERM) {
318 for(i=0;i<64;i++)
319 block[idct_mmx_perm[i]] = block1[i];
320 } else if(form == MMX_SIMPLE_PERM) {
321 for(i=0;i<64;i++)
322 block[idct_simple_mmx_perm[i]] = block1[i];
323 } else {
324 for(i=0; i<64; i++)
325 block[i]= block1[i];
326 }
327
328 ti = gettime();
329 it1 = 0;
330 do {
331 for(it=0;it<NB_ITS_SPEED;it++) {
332 for(i=0; i<64; i++)
333 block[i]= block1[i];
334 // memcpy(block, block1, sizeof(DCTELEM) * 64);
335 // dont memcpy especially not fastmemcpy because it does movntq !!!
336 fdct_func(block);
337 }
338 it1 += NB_ITS_SPEED;
339 ti1 = gettime() - ti;
340 } while (ti1 < 1000000);
341 emms_c();
342
343 printf("%s %s: %0.1f kdct/s\n",
344 is_idct ? "IDCT" : "DCT",
345 name, (double)it1 * 1000.0 / (double)ti1);
346 #endif
347 }
348
349 static uint8_t img_dest[64] __attribute__ ((aligned (8)));
350 static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
351
352 void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
353 {
354 static int init;
355 static double c8[8][8];
356 static double c4[4][4];
357 double block1[64], block2[64], block3[64];
358 double s, sum, v;
359 int i, j, k;
360
361 if (!init) {
362 init = 1;
363
364 for(i=0;i<8;i++) {
365 sum = 0;
366 for(j=0;j<8;j++) {
367 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
368 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
369 sum += c8[i][j] * c8[i][j];
370 }
371 }
372
373 for(i=0;i<4;i++) {
374 sum = 0;
375 for(j=0;j<4;j++) {
376 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
377 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
378 sum += c4[i][j] * c4[i][j];
379 }
380 }
381 }
382
383 /* butterfly */
384 s = 0.5 * sqrt(2.0);
385 for(i=0;i<4;i++) {
386 for(j=0;j<8;j++) {
387 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
388 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
389 }
390 }
391
392 /* idct8 on lines */
393 for(i=0;i<8;i++) {
394 for(j=0;j<8;j++) {
395 sum = 0;
396 for(k=0;k<8;k++)
397 sum += c8[k][j] * block1[8*i+k];
398 block2[8*i+j] = sum;
399 }
400 }
401
402 /* idct4 */
403 for(i=0;i<8;i++) {
404 for(j=0;j<4;j++) {
405 /* top */
406 sum = 0;
407 for(k=0;k<4;k++)
408 sum += c4[k][j] * block2[8*(2*k)+i];
409 block3[8*(2*j)+i] = sum;
410
411 /* bottom */
412 sum = 0;
413 for(k=0;k<4;k++)
414 sum += c4[k][j] * block2[8*(2*k+1)+i];
415 block3[8*(2*j+1)+i] = sum;
416 }
417 }
418
419 /* clamp and store the result */
420 for(i=0;i<8;i++) {
421 for(j=0;j<8;j++) {
422 v = block3[8*i+j];
423 if (v < 0)
424 v = 0;
425 else if (v > 255)
426 v = 255;
427 dest[i * linesize + j] = (int)rint(v);
428 }
429 }
430 }
431
432 void idct248_error(const char *name,
433 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
434 {
435 int it, i, it1, ti, ti1, err_max, v;
436
437 srandom(0);
438
439 /* just one test to see if code is correct (precision is less
440 important here) */
441 err_max = 0;
442 for(it=0;it<NB_ITS;it++) {
443
444 /* XXX: use forward transform to generate values */
445 for(i=0;i<64;i++)
446 block1[i] = (random() % 256) - 128;
447 block1[0] += 1024;
448
449 for(i=0; i<64; i++)
450 block[i]= block1[i];
451 idct248_ref(img_dest1, 8, block);
452
453 for(i=0; i<64; i++)
454 block[i]= block1[i];
455 idct248_put(img_dest, 8, block);
456
457 for(i=0;i<64;i++) {
458 v = abs((int)img_dest[i] - (int)img_dest1[i]);
459 if (v == 255)
460 printf("%d %d\n", img_dest[i], img_dest1[i]);
461 if (v > err_max)
462 err_max = v;
463 }
464 #if 0
465 printf("ref=\n");
466 for(i=0;i<8;i++) {
467 int j;
468 for(j=0;j<8;j++) {
469 printf(" %3d", img_dest1[i*8+j]);
470 }
471 printf("\n");
472 }
473
474 printf("out=\n");
475 for(i=0;i<8;i++) {
476 int j;
477 for(j=0;j<8;j++) {
478 printf(" %3d", img_dest[i*8+j]);
479 }
480 printf("\n");
481 }
482 #endif
483 }
484 printf("%s %s: err_inf=%d\n",
485 1 ? "IDCT248" : "DCT248",
486 name, err_max);
487
488 ti = gettime();
489 it1 = 0;
490 do {
491 for(it=0;it<NB_ITS_SPEED;it++) {
492 for(i=0; i<64; i++)
493 block[i]= block1[i];
494 // memcpy(block, block1, sizeof(DCTELEM) * 64);
495 // dont memcpy especially not fastmemcpy because it does movntq !!!
496 idct248_put(img_dest, 8, block);
497 }
498 it1 += NB_ITS_SPEED;
499 ti1 = gettime() - ti;
500 } while (ti1 < 1000000);
501 emms_c();
502
503 printf("%s %s: %0.1f kdct/s\n",
504 1 ? "IDCT248" : "DCT248",
505 name, (double)it1 * 1000.0 / (double)ti1);
506 }
507
508 void help(void)
509 {
510 printf("dct-test [-i] [<test-number>]\n"
511 "test-number 0 -> test with random matrixes\n"
512 " 1 -> test with random sparse matrixes\n"
513 " 2 -> do 3. test from mpeg4 std\n"
514 "-i test IDCT implementations\n"
515 "-4 test IDCT248 implementations\n");
516 }
517
518 int main(int argc, char **argv)
519 {
520 int test_idct = 0, test_248_dct = 0;
521 int c,i;
522 int test=1;
523
524 init_fdct();
525 idct_mmx_init();
526
527 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
528 for(i=0;i<MAX_NEG_CROP;i++) {
529 cropTbl[i] = 0;
530 cropTbl[i + MAX_NEG_CROP + 256] = 255;
531 }
532
533 for(;;) {
534 c = getopt(argc, argv, "ih4");
535 if (c == -1)
536 break;
537 switch(c) {
538 case 'i':
539 test_idct = 1;
540 break;
541 case '4':
542 test_248_dct = 1;
543 break;
544 default :
545 case 'h':
546 help();
547 return 0;
548 }
549 }
550
551 if(optind <argc) test= atoi(argv[optind]);
552
553 printf("ffmpeg DCT/IDCT test\n");
554
555 if (test_248_dct) {
556 idct248_error("SIMPLE-C", simple_idct248_put);
557 } else {
558 for (i=0;algos[i].name;i++)
559 if (algos[i].is_idct == test_idct) {
560 dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
561 }
562 }
563 return 0;
564 }