Remove unused variable.
[libav.git] / libavcodec / dct-test.c
CommitLineData
04d7f601
DB
1/*
2 * (c) 2001 Fabrice Bellard
3ac35bdb 3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
04d7f601 4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
04d7f601
DB
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
04d7f601 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
04d7f601
DB
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
04d7f601
DB
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
983e3246
MN
22/**
23 * @file dct-test.c
115329f1 24 * DCT test. (c) 2001 Fabrice Bellard.
983e3246
MN
25 * Started from sample code by Juan J. Sierralta P.
26 */
27
de6d9b64
FB
28#include <stdlib.h>
29#include <stdio.h>
30#include <string.h>
31#include <sys/time.h>
32#include <unistd.h>
33
34#include "dsputil.h"
35
86748dbc 36#include "simple_idct.h"
65e4c8c9 37#include "faandct.h"
9e1586fc 38
e366e679
FB
39#ifndef MAX
40#define MAX(a, b) (((a) > (b)) ? (a) : (b))
41#endif
42
434df899
MN
43#undef printf
44
45void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
46
9e1586fc 47/* reference fdct/idct */
de6d9b64 48extern void fdct(DCTELEM *block);
9e1586fc 49extern void idct(DCTELEM *block);
434df899
MN
50extern void ff_idct_xvid_mmx(DCTELEM *block);
51extern void ff_idct_xvid_mmx2(DCTELEM *block);
de6d9b64
FB
52extern void init_fdct();
53
9e1586fc
FB
54extern void ff_mmx_idct(DCTELEM *data);
55extern void ff_mmxext_idct(DCTELEM *data);
56
86748dbc
MN
57extern void odivx_idct_c (short *block);
58
3ac35bdb
MH
59// BFIN
60extern void ff_bfin_idct (DCTELEM *block) ;
61extern void ff_bfin_fdct (DCTELEM *block) ;
62
63// ALTIVEC
64extern void fdct_altivec (DCTELEM *block);
65//extern void idct_altivec (DCTELEM *block);?? no routine
66
67
68struct algo {
69 char *name;
70 enum { FDCT, IDCT } is_idct;
71 void (* func) (DCTELEM *block);
72 void (* ref) (DCTELEM *block);
73 enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM } format;
74};
75
76#ifndef FAAN_POSTSCALE
77#define FAAN_SCALE SCALE_PERM
78#else
79#define FAAN_SCALE NO_PERM
80#endif
81
82#define DCT_ERROR(name,is_idct,func,ref,form) {name,is_idct,func,ref,form}
83
84
85struct algo algos[] = {
86 DCT_ERROR( "REF-DBL", 0, fdct, fdct, NO_PERM),
b0b0d7e7 87 DCT_ERROR("FAAN", 0, ff_faandct, fdct, FAAN_SCALE),
3ac35bdb
MH
88 DCT_ERROR("IJG-AAN-INT", 0, fdct_ifast, fdct, SCALE_PERM),
89 DCT_ERROR("IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, NO_PERM),
90 DCT_ERROR("REF-DBL", 1, idct, idct, NO_PERM),
91 DCT_ERROR("INT", 1, j_rev_dct, idct, MMX_PERM),
92 DCT_ERROR("SIMPLE-C", 1, simple_idct, idct, NO_PERM),
93
ee3035f3 94#ifdef HAVE_MMX
3ac35bdb 95 DCT_ERROR("MMX", 0, ff_fdct_mmx, fdct, NO_PERM),
94254fc0 96#ifdef HAVE_MMX2
3ac35bdb 97 DCT_ERROR("MMX2", 0, ff_fdct_mmx2, fdct, NO_PERM),
94254fc0 98#endif
3ac35bdb 99
b9702de5 100#ifdef CONFIG_GPL
3ac35bdb
MH
101 DCT_ERROR("LIBMPEG2-MMX", 1, ff_mmx_idct, idct, MMX_PERM),
102 DCT_ERROR("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, MMX_PERM),
b9702de5 103#endif
3ac35bdb
MH
104 DCT_ERROR("SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM),
105 DCT_ERROR("XVID-MMX", 1, ff_idct_xvid_mmx, idct, NO_PERM),
106 DCT_ERROR("XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, NO_PERM),
107#endif
108
109#ifdef HAVE_ALTIVEC
110 DCT_ERROR("altivecfdct", 0, fdct_altivec, fdct, NO_PERM),
111#endif
112
113#ifdef ARCH_BFIN
114 DCT_ERROR("BFINfdct", 0, ff_bfin_fdct, fdct, NO_PERM),
115 DCT_ERROR("BFINidct", 1, ff_bfin_idct, idct, NO_PERM),
116#endif
117
118 { 0 }
119};
120
de6d9b64
FB
121#define AANSCALE_BITS 12
122static const unsigned short aanscales[64] = {
123 /* precomputed values scaled up by 14 bits */
124 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
125 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
126 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
127 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
128 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
129 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
130 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
131 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
132};
133
486497e0 134uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
86748dbc 135
0c1a9eda 136int64_t gettime(void)
de6d9b64
FB
137{
138 struct timeval tv;
139 gettimeofday(&tv,NULL);
0c1a9eda 140 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
de6d9b64
FB
141}
142
143#define NB_ITS 20000
144#define NB_ITS_SPEED 50000
145
9e1586fc
FB
146static short idct_mmx_perm[64];
147
86748dbc 148static short idct_simple_mmx_perm[64]={
bb270c08
DB
149 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
150 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
151 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
152 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
153 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
154 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
155 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
156 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
86748dbc
MN
157};
158
9e1586fc
FB
159void idct_mmx_init(void)
160{
161 int i;
162
163 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
164 for (i = 0; i < 64; i++) {
bb270c08
DB
165 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
166// idct_simple_mmx_perm[i] = simple_block_permute_op(i);
9e1586fc
FB
167 }
168}
169
170static DCTELEM block[64] __attribute__ ((aligned (8)));
171static DCTELEM block1[64] __attribute__ ((aligned (8)));
86748dbc 172static DCTELEM block_org[64] __attribute__ ((aligned (8)));
9e1586fc
FB
173
174void dct_error(const char *name, int is_idct,
175 void (*fdct_func)(DCTELEM *block),
3ac35bdb 176 void (*fdct_ref)(DCTELEM *block), int form, int test)
de6d9b64
FB
177{
178 int it, i, scale;
de6d9b64 179 int err_inf, v;
0c1a9eda
ZK
180 int64_t err2, ti, ti1, it1;
181 int64_t sysErr[64], sysErrMax=0;
86748dbc 182 int maxout=0;
86748dbc 183 int blockSumErrMax=0, blockSumErr;
de6d9b64
FB
184
185 srandom(0);
186
187 err_inf = 0;
188 err2 = 0;
86748dbc 189 for(i=0; i<64; i++) sysErr[i]=0;
de6d9b64 190 for(it=0;it<NB_ITS;it++) {
86748dbc
MN
191 for(i=0;i<64;i++)
192 block1[i] = 0;
193 switch(test){
115329f1 194 case 0:
86748dbc
MN
195 for(i=0;i<64;i++)
196 block1[i] = (random() % 512) -256;
ad324c93 197 if (is_idct){
86748dbc 198 fdct(block1);
ad324c93
MN
199
200 for(i=0;i<64;i++)
201 block1[i]>>=3;
202 }
86748dbc
MN
203 break;
204 case 1:{
205 int num= (random()%10)+1;
206 for(i=0;i<num;i++)
207 block1[random()%64] = (random() % 512) -256;
208 }break;
209 case 2:
210 block1[0]= (random()%4096)-2048;
211 block1[63]= (block1[0]&1)^1;
212 break;
213 }
9e1586fc 214
86748dbc
MN
215#if 0 // simulate mismatch control
216{ int sum=0;
217 for(i=0;i<64;i++)
218 sum+=block1[i];
219
115329f1 220 if((sum&1)==0) block1[63]^=1;
86748dbc
MN
221}
222#endif
223
224 for(i=0; i<64; i++)
225 block_org[i]= block1[i];
9e1586fc 226
3ac35bdb 227 if (form == MMX_PERM) {
86748dbc 228 for(i=0;i<64;i++)
9e1586fc 229 block[idct_mmx_perm[i]] = block1[i];
3ac35bdb 230 } else if (form == MMX_SIMPLE_PERM) {
86748dbc
MN
231 for(i=0;i<64;i++)
232 block[idct_simple_mmx_perm[i]] = block1[i];
233
bb270c08 234 } else {
86748dbc
MN
235 for(i=0; i<64; i++)
236 block[i]= block1[i];
9e1586fc 237 }
86748dbc
MN
238#if 0 // simulate mismatch control for tested IDCT but not the ref
239{ int sum=0;
240 for(i=0;i<64;i++)
241 sum+=block[i];
242
115329f1 243 if((sum&1)==0) block[63]^=1;
86748dbc
MN
244}
245#endif
9e1586fc 246
de6d9b64 247 fdct_func(block);
19ef2ba5 248 emms_c(); /* for ff_mmx_idct */
9e1586fc 249
3ac35bdb 250 if (form == SCALE_PERM) {
de6d9b64 251 for(i=0; i<64; i++) {
ad324c93 252 scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
86748dbc
MN
253 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
254 }
255 }
256
9e1586fc 257 fdct_ref(block1);
de6d9b64 258
86748dbc 259 blockSumErr=0;
de6d9b64
FB
260 for(i=0;i<64;i++) {
261 v = abs(block[i] - block1[i]);
262 if (v > err_inf)
263 err_inf = v;
264 err2 += v * v;
bb270c08
DB
265 sysErr[i] += block[i] - block1[i];
266 blockSumErr += v;
267 if( abs(block[i])>maxout) maxout=abs(block[i]);
de6d9b64 268 }
86748dbc
MN
269 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
270#if 0 // print different matrix pairs
271 if(blockSumErr){
272 printf("\n");
273 for(i=0; i<64; i++){
274 if((i&7)==0) printf("\n");
275 printf("%4d ", block_org[i]);
276 }
277 for(i=0; i<64; i++){
278 if((i&7)==0) printf("\n");
279 printf("%4d ", block[i] - block1[i]);
280 }
281 }
282#endif
283 }
c26abfa5 284 for(i=0; i<64; i++) sysErrMax= MAX(sysErrMax, FFABS(sysErr[i]));
115329f1 285
86748dbc
MN
286#if 1 // dump systematic errors
287 for(i=0; i<64; i++){
bb270c08 288 if(i%8==0) printf("\n");
86748dbc 289 printf("%5d ", (int)sysErr[i]);
de6d9b64 290 }
86748dbc
MN
291 printf("\n");
292#endif
115329f1 293
86748dbc 294 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
9e1586fc 295 is_idct ? "IDCT" : "DCT",
86748dbc
MN
296 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
297#if 1 //Speed test
de6d9b64 298 /* speed test */
86748dbc
MN
299 for(i=0;i<64;i++)
300 block1[i] = 0;
301 switch(test){
115329f1 302 case 0:
86748dbc
MN
303 for(i=0;i<64;i++)
304 block1[i] = (random() % 512) -256;
ad324c93 305 if (is_idct){
86748dbc 306 fdct(block1);
ad324c93
MN
307
308 for(i=0;i<64;i++)
309 block1[i]>>=3;
310 }
86748dbc
MN
311 break;
312 case 1:{
313 case 2:
314 block1[0] = (random() % 512) -256;
315 block1[1] = (random() % 512) -256;
316 block1[2] = (random() % 512) -256;
317 block1[3] = (random() % 512) -256;
318 }break;
319 }
de6d9b64 320
3ac35bdb 321 if (form == MMX_PERM) {
86748dbc 322 for(i=0;i<64;i++)
9e1586fc 323 block[idct_mmx_perm[i]] = block1[i];
3ac35bdb 324 } else if(form == MMX_SIMPLE_PERM) {
86748dbc
MN
325 for(i=0;i<64;i++)
326 block[idct_simple_mmx_perm[i]] = block1[i];
327 } else {
328 for(i=0; i<64; i++)
329 block[i]= block1[i];
9e1586fc
FB
330 }
331
de6d9b64
FB
332 ti = gettime();
333 it1 = 0;
334 do {
335 for(it=0;it<NB_ITS_SPEED;it++) {
86748dbc
MN
336 for(i=0; i<64; i++)
337 block[i]= block1[i];
338// memcpy(block, block1, sizeof(DCTELEM) * 64);
339// dont memcpy especially not fastmemcpy because it does movntq !!!
de6d9b64
FB
340 fdct_func(block);
341 }
342 it1 += NB_ITS_SPEED;
343 ti1 = gettime() - ti;
344 } while (ti1 < 1000000);
19ef2ba5 345 emms_c();
de6d9b64 346
86748dbc 347 printf("%s %s: %0.1f kdct/s\n",
9e1586fc 348 is_idct ? "IDCT" : "DCT",
de6d9b64 349 name, (double)it1 * 1000.0 / (double)ti1);
86748dbc 350#endif
de6d9b64
FB
351}
352
0c1a9eda
ZK
353static uint8_t img_dest[64] __attribute__ ((aligned (8)));
354static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
a46a3ce4 355
0c1a9eda 356void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
a46a3ce4
FB
357{
358 static int init;
359 static double c8[8][8];
360 static double c4[4][4];
361 double block1[64], block2[64], block3[64];
362 double s, sum, v;
363 int i, j, k;
364
365 if (!init) {
366 init = 1;
367
368 for(i=0;i<8;i++) {
369 sum = 0;
370 for(j=0;j<8;j++) {
371 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
372 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
373 sum += c8[i][j] * c8[i][j];
374 }
375 }
115329f1 376
a46a3ce4
FB
377 for(i=0;i<4;i++) {
378 sum = 0;
379 for(j=0;j<4;j++) {
380 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
381 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
382 sum += c4[i][j] * c4[i][j];
383 }
384 }
385 }
386
387 /* butterfly */
652f0197 388 s = 0.5 * sqrt(2.0);
a46a3ce4
FB
389 for(i=0;i<4;i++) {
390 for(j=0;j<8;j++) {
652f0197
FB
391 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
392 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
a46a3ce4
FB
393 }
394 }
395
396 /* idct8 on lines */
397 for(i=0;i<8;i++) {
398 for(j=0;j<8;j++) {
399 sum = 0;
400 for(k=0;k<8;k++)
401 sum += c8[k][j] * block1[8*i+k];
402 block2[8*i+j] = sum;
403 }
404 }
405
406 /* idct4 */
407 for(i=0;i<8;i++) {
408 for(j=0;j<4;j++) {
409 /* top */
410 sum = 0;
411 for(k=0;k<4;k++)
412 sum += c4[k][j] * block2[8*(2*k)+i];
413 block3[8*(2*j)+i] = sum;
414
415 /* bottom */
416 sum = 0;
417 for(k=0;k<4;k++)
418 sum += c4[k][j] * block2[8*(2*k+1)+i];
419 block3[8*(2*j+1)+i] = sum;
420 }
421 }
422
423 /* clamp and store the result */
424 for(i=0;i<8;i++) {
425 for(j=0;j<8;j++) {
652f0197 426 v = block3[8*i+j];
a46a3ce4
FB
427 if (v < 0)
428 v = 0;
429 else if (v > 255)
430 v = 255;
431 dest[i * linesize + j] = (int)rint(v);
432 }
433 }
434}
435
115329f1 436void idct248_error(const char *name,
0c1a9eda 437 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
a46a3ce4
FB
438{
439 int it, i, it1, ti, ti1, err_max, v;
440
441 srandom(0);
115329f1 442
a46a3ce4
FB
443 /* just one test to see if code is correct (precision is less
444 important here) */
445 err_max = 0;
446 for(it=0;it<NB_ITS;it++) {
115329f1 447
652f0197
FB
448 /* XXX: use forward transform to generate values */
449 for(i=0;i<64;i++)
450 block1[i] = (random() % 256) - 128;
451 block1[0] += 1024;
452
a46a3ce4
FB
453 for(i=0; i<64; i++)
454 block[i]= block1[i];
455 idct248_ref(img_dest1, 8, block);
115329f1 456
652f0197
FB
457 for(i=0; i<64; i++)
458 block[i]= block1[i];
459 idct248_put(img_dest, 8, block);
115329f1 460
652f0197
FB
461 for(i=0;i<64;i++) {
462 v = abs((int)img_dest[i] - (int)img_dest1[i]);
463 if (v == 255)
464 printf("%d %d\n", img_dest[i], img_dest1[i]);
465 if (v > err_max)
466 err_max = v;
467 }
a46a3ce4
FB
468#if 0
469 printf("ref=\n");
470 for(i=0;i<8;i++) {
471 int j;
472 for(j=0;j<8;j++) {
473 printf(" %3d", img_dest1[i*8+j]);
474 }
475 printf("\n");
476 }
115329f1 477
a46a3ce4
FB
478 printf("out=\n");
479 for(i=0;i<8;i++) {
480 int j;
481 for(j=0;j<8;j++) {
482 printf(" %3d", img_dest[i*8+j]);
483 }
484 printf("\n");
485 }
486#endif
a46a3ce4
FB
487 }
488 printf("%s %s: err_inf=%d\n",
489 1 ? "IDCT248" : "DCT248",
490 name, err_max);
491
492 ti = gettime();
493 it1 = 0;
494 do {
495 for(it=0;it<NB_ITS_SPEED;it++) {
496 for(i=0; i<64; i++)
497 block[i]= block1[i];
498// memcpy(block, block1, sizeof(DCTELEM) * 64);
499// dont memcpy especially not fastmemcpy because it does movntq !!!
500 idct248_put(img_dest, 8, block);
501 }
502 it1 += NB_ITS_SPEED;
503 ti1 = gettime() - ti;
504 } while (ti1 < 1000000);
19ef2ba5 505 emms_c();
a46a3ce4
FB
506
507 printf("%s %s: %0.1f kdct/s\n",
508 1 ? "IDCT248" : "DCT248",
509 name, (double)it1 * 1000.0 / (double)ti1);
510}
511
9e1586fc
FB
512void help(void)
513{
86748dbc
MN
514 printf("dct-test [-i] [<test-number>]\n"
515 "test-number 0 -> test with random matrixes\n"
516 " 1 -> test with random sparse matrixes\n"
517 " 2 -> do 3. test from mpeg4 std\n"
a46a3ce4
FB
518 "-i test IDCT implementations\n"
519 "-4 test IDCT248 implementations\n");
9e1586fc
FB
520}
521
de6d9b64
FB
522int main(int argc, char **argv)
523{
a46a3ce4 524 int test_idct = 0, test_248_dct = 0;
86748dbc
MN
525 int c,i;
526 int test=1;
9e1586fc 527
de6d9b64 528 init_fdct();
9e1586fc 529 idct_mmx_init();
de6d9b64 530
486497e0 531 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
86748dbc 532 for(i=0;i<MAX_NEG_CROP;i++) {
486497e0
MR
533 cropTbl[i] = 0;
534 cropTbl[i + MAX_NEG_CROP + 256] = 255;
86748dbc 535 }
115329f1 536
9e1586fc 537 for(;;) {
a46a3ce4 538 c = getopt(argc, argv, "ih4");
9e1586fc
FB
539 if (c == -1)
540 break;
541 switch(c) {
542 case 'i':
543 test_idct = 1;
544 break;
a46a3ce4
FB
545 case '4':
546 test_248_dct = 1;
547 break;
86748dbc 548 default :
9e1586fc
FB
549 case 'h':
550 help();
c6bdc908 551 return 0;
9e1586fc
FB
552 }
553 }
115329f1 554
86748dbc 555 if(optind <argc) test= atoi(argv[optind]);
115329f1 556
9e1586fc
FB
557 printf("ffmpeg DCT/IDCT test\n");
558
a46a3ce4
FB
559 if (test_248_dct) {
560 idct248_error("SIMPLE-C", simple_idct248_put);
9e1586fc 561 } else {
3ac35bdb
MH
562 for (i=0;algos[i].name;i++)
563 if (algos[i].is_idct == test_idct) {
564 dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
a46a3ce4 565 }
9e1586fc 566 }
de6d9b64
FB
567 return 0;
568}