Fix linking when GPL code has been disabled.
[libav.git] / libavcodec / dct-test.c
CommitLineData
04d7f601
DB
1/*
2 * (c) 2001 Fabrice Bellard
3ac35bdb 3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
04d7f601 4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
04d7f601
DB
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
04d7f601 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
04d7f601
DB
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
04d7f601
DB
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
983e3246
MN
22/**
23 * @file dct-test.c
115329f1 24 * DCT test. (c) 2001 Fabrice Bellard.
983e3246
MN
25 * Started from sample code by Juan J. Sierralta P.
26 */
27
de6d9b64
FB
28#include <stdlib.h>
29#include <stdio.h>
30#include <string.h>
31#include <sys/time.h>
32#include <unistd.h>
33
34#include "dsputil.h"
35
86748dbc 36#include "simple_idct.h"
65e4c8c9 37#include "faandct.h"
9e1586fc 38
e366e679
FB
39#ifndef MAX
40#define MAX(a, b) (((a) > (b)) ? (a) : (b))
41#endif
42
434df899
MN
43#undef printf
44
45void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
46
9e1586fc 47/* reference fdct/idct */
de6d9b64 48extern void fdct(DCTELEM *block);
9e1586fc 49extern void idct(DCTELEM *block);
434df899
MN
50extern void ff_idct_xvid_mmx(DCTELEM *block);
51extern void ff_idct_xvid_mmx2(DCTELEM *block);
de6d9b64
FB
52extern void init_fdct();
53
9e1586fc
FB
54extern void ff_mmx_idct(DCTELEM *data);
55extern void ff_mmxext_idct(DCTELEM *data);
56
86748dbc
MN
57extern void odivx_idct_c (short *block);
58
3ac35bdb
MH
59// BFIN
60extern void ff_bfin_idct (DCTELEM *block) ;
61extern void ff_bfin_fdct (DCTELEM *block) ;
62
63// ALTIVEC
64extern void fdct_altivec (DCTELEM *block);
65//extern void idct_altivec (DCTELEM *block);?? no routine
66
67
68struct algo {
69 char *name;
70 enum { FDCT, IDCT } is_idct;
71 void (* func) (DCTELEM *block);
72 void (* ref) (DCTELEM *block);
73 enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM } format;
74};
75
76#ifndef FAAN_POSTSCALE
77#define FAAN_SCALE SCALE_PERM
78#else
79#define FAAN_SCALE NO_PERM
80#endif
81
82#define DCT_ERROR(name,is_idct,func,ref,form) {name,is_idct,func,ref,form}
83
84
85struct algo algos[] = {
86 DCT_ERROR( "REF-DBL", 0, fdct, fdct, NO_PERM),
b0b0d7e7 87 DCT_ERROR("FAAN", 0, ff_faandct, fdct, FAAN_SCALE),
3ac35bdb
MH
88 DCT_ERROR("IJG-AAN-INT", 0, fdct_ifast, fdct, SCALE_PERM),
89 DCT_ERROR("IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, NO_PERM),
90 DCT_ERROR("REF-DBL", 1, idct, idct, NO_PERM),
91 DCT_ERROR("INT", 1, j_rev_dct, idct, MMX_PERM),
92 DCT_ERROR("SIMPLE-C", 1, simple_idct, idct, NO_PERM),
93
ee3035f3 94#ifdef HAVE_MMX
3ac35bdb
MH
95 DCT_ERROR("MMX", 0, ff_fdct_mmx, fdct, NO_PERM),
96 DCT_ERROR("MMX2", 0, ff_fdct_mmx2, fdct, NO_PERM),
3ac35bdb 97
b9702de5 98#ifdef CONFIG_GPL
3ac35bdb
MH
99 DCT_ERROR("LIBMPEG2-MMX", 1, ff_mmx_idct, idct, MMX_PERM),
100 DCT_ERROR("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, MMX_PERM),
b9702de5 101#endif
3ac35bdb
MH
102 DCT_ERROR("SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM),
103 DCT_ERROR("XVID-MMX", 1, ff_idct_xvid_mmx, idct, NO_PERM),
104 DCT_ERROR("XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, NO_PERM),
105#endif
106
107#ifdef HAVE_ALTIVEC
108 DCT_ERROR("altivecfdct", 0, fdct_altivec, fdct, NO_PERM),
109#endif
110
111#ifdef ARCH_BFIN
112 DCT_ERROR("BFINfdct", 0, ff_bfin_fdct, fdct, NO_PERM),
113 DCT_ERROR("BFINidct", 1, ff_bfin_idct, idct, NO_PERM),
114#endif
115
116 { 0 }
117};
118
de6d9b64
FB
119#define AANSCALE_BITS 12
120static const unsigned short aanscales[64] = {
121 /* precomputed values scaled up by 14 bits */
122 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
123 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
124 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
125 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
126 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
127 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
128 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
129 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
130};
131
486497e0 132uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
86748dbc 133
0c1a9eda 134int64_t gettime(void)
de6d9b64
FB
135{
136 struct timeval tv;
137 gettimeofday(&tv,NULL);
0c1a9eda 138 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
de6d9b64
FB
139}
140
141#define NB_ITS 20000
142#define NB_ITS_SPEED 50000
143
9e1586fc
FB
144static short idct_mmx_perm[64];
145
86748dbc 146static short idct_simple_mmx_perm[64]={
bb270c08
DB
147 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
148 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
149 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
150 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
151 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
152 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
153 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
154 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
86748dbc
MN
155};
156
9e1586fc
FB
157void idct_mmx_init(void)
158{
159 int i;
160
161 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
162 for (i = 0; i < 64; i++) {
bb270c08
DB
163 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
164// idct_simple_mmx_perm[i] = simple_block_permute_op(i);
9e1586fc
FB
165 }
166}
167
168static DCTELEM block[64] __attribute__ ((aligned (8)));
169static DCTELEM block1[64] __attribute__ ((aligned (8)));
86748dbc 170static DCTELEM block_org[64] __attribute__ ((aligned (8)));
9e1586fc
FB
171
172void dct_error(const char *name, int is_idct,
173 void (*fdct_func)(DCTELEM *block),
3ac35bdb 174 void (*fdct_ref)(DCTELEM *block), int form, int test)
de6d9b64
FB
175{
176 int it, i, scale;
de6d9b64 177 int err_inf, v;
0c1a9eda
ZK
178 int64_t err2, ti, ti1, it1;
179 int64_t sysErr[64], sysErrMax=0;
86748dbc 180 int maxout=0;
86748dbc 181 int blockSumErrMax=0, blockSumErr;
de6d9b64
FB
182
183 srandom(0);
184
185 err_inf = 0;
186 err2 = 0;
86748dbc 187 for(i=0; i<64; i++) sysErr[i]=0;
de6d9b64 188 for(it=0;it<NB_ITS;it++) {
86748dbc
MN
189 for(i=0;i<64;i++)
190 block1[i] = 0;
191 switch(test){
115329f1 192 case 0:
86748dbc
MN
193 for(i=0;i<64;i++)
194 block1[i] = (random() % 512) -256;
ad324c93 195 if (is_idct){
86748dbc 196 fdct(block1);
ad324c93
MN
197
198 for(i=0;i<64;i++)
199 block1[i]>>=3;
200 }
86748dbc
MN
201 break;
202 case 1:{
203 int num= (random()%10)+1;
204 for(i=0;i<num;i++)
205 block1[random()%64] = (random() % 512) -256;
206 }break;
207 case 2:
208 block1[0]= (random()%4096)-2048;
209 block1[63]= (block1[0]&1)^1;
210 break;
211 }
9e1586fc 212
86748dbc
MN
213#if 0 // simulate mismatch control
214{ int sum=0;
215 for(i=0;i<64;i++)
216 sum+=block1[i];
217
115329f1 218 if((sum&1)==0) block1[63]^=1;
86748dbc
MN
219}
220#endif
221
222 for(i=0; i<64; i++)
223 block_org[i]= block1[i];
9e1586fc 224
3ac35bdb 225 if (form == MMX_PERM) {
86748dbc 226 for(i=0;i<64;i++)
9e1586fc 227 block[idct_mmx_perm[i]] = block1[i];
3ac35bdb 228 } else if (form == MMX_SIMPLE_PERM) {
86748dbc
MN
229 for(i=0;i<64;i++)
230 block[idct_simple_mmx_perm[i]] = block1[i];
231
bb270c08 232 } else {
86748dbc
MN
233 for(i=0; i<64; i++)
234 block[i]= block1[i];
9e1586fc 235 }
86748dbc
MN
236#if 0 // simulate mismatch control for tested IDCT but not the ref
237{ int sum=0;
238 for(i=0;i<64;i++)
239 sum+=block[i];
240
115329f1 241 if((sum&1)==0) block[63]^=1;
86748dbc
MN
242}
243#endif
9e1586fc 244
de6d9b64 245 fdct_func(block);
19ef2ba5 246 emms_c(); /* for ff_mmx_idct */
9e1586fc 247
3ac35bdb 248 if (form == SCALE_PERM) {
de6d9b64 249 for(i=0; i<64; i++) {
ad324c93 250 scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
86748dbc
MN
251 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
252 }
253 }
254
9e1586fc 255 fdct_ref(block1);
de6d9b64 256
86748dbc 257 blockSumErr=0;
de6d9b64
FB
258 for(i=0;i<64;i++) {
259 v = abs(block[i] - block1[i]);
260 if (v > err_inf)
261 err_inf = v;
262 err2 += v * v;
bb270c08
DB
263 sysErr[i] += block[i] - block1[i];
264 blockSumErr += v;
265 if( abs(block[i])>maxout) maxout=abs(block[i]);
de6d9b64 266 }
86748dbc
MN
267 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
268#if 0 // print different matrix pairs
269 if(blockSumErr){
270 printf("\n");
271 for(i=0; i<64; i++){
272 if((i&7)==0) printf("\n");
273 printf("%4d ", block_org[i]);
274 }
275 for(i=0; i<64; i++){
276 if((i&7)==0) printf("\n");
277 printf("%4d ", block[i] - block1[i]);
278 }
279 }
280#endif
281 }
c26abfa5 282 for(i=0; i<64; i++) sysErrMax= MAX(sysErrMax, FFABS(sysErr[i]));
115329f1 283
86748dbc
MN
284#if 1 // dump systematic errors
285 for(i=0; i<64; i++){
bb270c08 286 if(i%8==0) printf("\n");
86748dbc 287 printf("%5d ", (int)sysErr[i]);
de6d9b64 288 }
86748dbc
MN
289 printf("\n");
290#endif
115329f1 291
86748dbc 292 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
9e1586fc 293 is_idct ? "IDCT" : "DCT",
86748dbc
MN
294 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
295#if 1 //Speed test
de6d9b64 296 /* speed test */
86748dbc
MN
297 for(i=0;i<64;i++)
298 block1[i] = 0;
299 switch(test){
115329f1 300 case 0:
86748dbc
MN
301 for(i=0;i<64;i++)
302 block1[i] = (random() % 512) -256;
ad324c93 303 if (is_idct){
86748dbc 304 fdct(block1);
ad324c93
MN
305
306 for(i=0;i<64;i++)
307 block1[i]>>=3;
308 }
86748dbc
MN
309 break;
310 case 1:{
311 case 2:
312 block1[0] = (random() % 512) -256;
313 block1[1] = (random() % 512) -256;
314 block1[2] = (random() % 512) -256;
315 block1[3] = (random() % 512) -256;
316 }break;
317 }
de6d9b64 318
3ac35bdb 319 if (form == MMX_PERM) {
86748dbc 320 for(i=0;i<64;i++)
9e1586fc 321 block[idct_mmx_perm[i]] = block1[i];
3ac35bdb 322 } else if(form == MMX_SIMPLE_PERM) {
86748dbc
MN
323 for(i=0;i<64;i++)
324 block[idct_simple_mmx_perm[i]] = block1[i];
325 } else {
326 for(i=0; i<64; i++)
327 block[i]= block1[i];
9e1586fc
FB
328 }
329
de6d9b64
FB
330 ti = gettime();
331 it1 = 0;
332 do {
333 for(it=0;it<NB_ITS_SPEED;it++) {
86748dbc
MN
334 for(i=0; i<64; i++)
335 block[i]= block1[i];
336// memcpy(block, block1, sizeof(DCTELEM) * 64);
337// dont memcpy especially not fastmemcpy because it does movntq !!!
de6d9b64
FB
338 fdct_func(block);
339 }
340 it1 += NB_ITS_SPEED;
341 ti1 = gettime() - ti;
342 } while (ti1 < 1000000);
19ef2ba5 343 emms_c();
de6d9b64 344
86748dbc 345 printf("%s %s: %0.1f kdct/s\n",
9e1586fc 346 is_idct ? "IDCT" : "DCT",
de6d9b64 347 name, (double)it1 * 1000.0 / (double)ti1);
86748dbc 348#endif
de6d9b64
FB
349}
350
0c1a9eda
ZK
351static uint8_t img_dest[64] __attribute__ ((aligned (8)));
352static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
a46a3ce4 353
0c1a9eda 354void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
a46a3ce4
FB
355{
356 static int init;
357 static double c8[8][8];
358 static double c4[4][4];
359 double block1[64], block2[64], block3[64];
360 double s, sum, v;
361 int i, j, k;
362
363 if (!init) {
364 init = 1;
365
366 for(i=0;i<8;i++) {
367 sum = 0;
368 for(j=0;j<8;j++) {
369 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
370 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
371 sum += c8[i][j] * c8[i][j];
372 }
373 }
115329f1 374
a46a3ce4
FB
375 for(i=0;i<4;i++) {
376 sum = 0;
377 for(j=0;j<4;j++) {
378 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
379 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
380 sum += c4[i][j] * c4[i][j];
381 }
382 }
383 }
384
385 /* butterfly */
652f0197 386 s = 0.5 * sqrt(2.0);
a46a3ce4
FB
387 for(i=0;i<4;i++) {
388 for(j=0;j<8;j++) {
652f0197
FB
389 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
390 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
a46a3ce4
FB
391 }
392 }
393
394 /* idct8 on lines */
395 for(i=0;i<8;i++) {
396 for(j=0;j<8;j++) {
397 sum = 0;
398 for(k=0;k<8;k++)
399 sum += c8[k][j] * block1[8*i+k];
400 block2[8*i+j] = sum;
401 }
402 }
403
404 /* idct4 */
405 for(i=0;i<8;i++) {
406 for(j=0;j<4;j++) {
407 /* top */
408 sum = 0;
409 for(k=0;k<4;k++)
410 sum += c4[k][j] * block2[8*(2*k)+i];
411 block3[8*(2*j)+i] = sum;
412
413 /* bottom */
414 sum = 0;
415 for(k=0;k<4;k++)
416 sum += c4[k][j] * block2[8*(2*k+1)+i];
417 block3[8*(2*j+1)+i] = sum;
418 }
419 }
420
421 /* clamp and store the result */
422 for(i=0;i<8;i++) {
423 for(j=0;j<8;j++) {
652f0197 424 v = block3[8*i+j];
a46a3ce4
FB
425 if (v < 0)
426 v = 0;
427 else if (v > 255)
428 v = 255;
429 dest[i * linesize + j] = (int)rint(v);
430 }
431 }
432}
433
115329f1 434void idct248_error(const char *name,
0c1a9eda 435 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
a46a3ce4
FB
436{
437 int it, i, it1, ti, ti1, err_max, v;
438
439 srandom(0);
115329f1 440
a46a3ce4
FB
441 /* just one test to see if code is correct (precision is less
442 important here) */
443 err_max = 0;
444 for(it=0;it<NB_ITS;it++) {
115329f1 445
652f0197
FB
446 /* XXX: use forward transform to generate values */
447 for(i=0;i<64;i++)
448 block1[i] = (random() % 256) - 128;
449 block1[0] += 1024;
450
a46a3ce4
FB
451 for(i=0; i<64; i++)
452 block[i]= block1[i];
453 idct248_ref(img_dest1, 8, block);
115329f1 454
652f0197
FB
455 for(i=0; i<64; i++)
456 block[i]= block1[i];
457 idct248_put(img_dest, 8, block);
115329f1 458
652f0197
FB
459 for(i=0;i<64;i++) {
460 v = abs((int)img_dest[i] - (int)img_dest1[i]);
461 if (v == 255)
462 printf("%d %d\n", img_dest[i], img_dest1[i]);
463 if (v > err_max)
464 err_max = v;
465 }
a46a3ce4
FB
466#if 0
467 printf("ref=\n");
468 for(i=0;i<8;i++) {
469 int j;
470 for(j=0;j<8;j++) {
471 printf(" %3d", img_dest1[i*8+j]);
472 }
473 printf("\n");
474 }
115329f1 475
a46a3ce4
FB
476 printf("out=\n");
477 for(i=0;i<8;i++) {
478 int j;
479 for(j=0;j<8;j++) {
480 printf(" %3d", img_dest[i*8+j]);
481 }
482 printf("\n");
483 }
484#endif
a46a3ce4
FB
485 }
486 printf("%s %s: err_inf=%d\n",
487 1 ? "IDCT248" : "DCT248",
488 name, err_max);
489
490 ti = gettime();
491 it1 = 0;
492 do {
493 for(it=0;it<NB_ITS_SPEED;it++) {
494 for(i=0; i<64; i++)
495 block[i]= block1[i];
496// memcpy(block, block1, sizeof(DCTELEM) * 64);
497// dont memcpy especially not fastmemcpy because it does movntq !!!
498 idct248_put(img_dest, 8, block);
499 }
500 it1 += NB_ITS_SPEED;
501 ti1 = gettime() - ti;
502 } while (ti1 < 1000000);
19ef2ba5 503 emms_c();
a46a3ce4
FB
504
505 printf("%s %s: %0.1f kdct/s\n",
506 1 ? "IDCT248" : "DCT248",
507 name, (double)it1 * 1000.0 / (double)ti1);
508}
509
9e1586fc
FB
510void help(void)
511{
86748dbc
MN
512 printf("dct-test [-i] [<test-number>]\n"
513 "test-number 0 -> test with random matrixes\n"
514 " 1 -> test with random sparse matrixes\n"
515 " 2 -> do 3. test from mpeg4 std\n"
a46a3ce4
FB
516 "-i test IDCT implementations\n"
517 "-4 test IDCT248 implementations\n");
9e1586fc
FB
518}
519
de6d9b64
FB
520int main(int argc, char **argv)
521{
a46a3ce4 522 int test_idct = 0, test_248_dct = 0;
86748dbc
MN
523 int c,i;
524 int test=1;
9e1586fc 525
de6d9b64 526 init_fdct();
9e1586fc 527 idct_mmx_init();
de6d9b64 528
486497e0 529 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
86748dbc 530 for(i=0;i<MAX_NEG_CROP;i++) {
486497e0
MR
531 cropTbl[i] = 0;
532 cropTbl[i + MAX_NEG_CROP + 256] = 255;
86748dbc 533 }
115329f1 534
9e1586fc 535 for(;;) {
a46a3ce4 536 c = getopt(argc, argv, "ih4");
9e1586fc
FB
537 if (c == -1)
538 break;
539 switch(c) {
540 case 'i':
541 test_idct = 1;
542 break;
a46a3ce4
FB
543 case '4':
544 test_248_dct = 1;
545 break;
86748dbc 546 default :
9e1586fc
FB
547 case 'h':
548 help();
c6bdc908 549 return 0;
9e1586fc
FB
550 }
551 }
115329f1 552
86748dbc 553 if(optind <argc) test= atoi(argv[optind]);
115329f1 554
9e1586fc
FB
555 printf("ffmpeg DCT/IDCT test\n");
556
a46a3ce4
FB
557 if (test_248_dct) {
558 idct248_error("SIMPLE-C", simple_idct248_put);
9e1586fc 559 } else {
3ac35bdb
MH
560 for (i=0;algos[i].name;i++)
561 if (algos[i].is_idct == test_idct) {
562 dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
a46a3ce4 563 }
9e1586fc 564 }
de6d9b64
FB
565 return 0;
566}