Move the video size and rate abbreviations system from libavformat to libavcodec
[libav.git] / libavcodec / dct-test.c
CommitLineData
04d7f601
DB
1/*
2 * (c) 2001 Fabrice Bellard
3ac35bdb 3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
04d7f601 4 *
b78e7197
DB
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
04d7f601
DB
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
04d7f601 11 *
b78e7197 12 * FFmpeg is distributed in the hope that it will be useful,
04d7f601
DB
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
b78e7197 18 * License along with FFmpeg; if not, write to the Free Software
04d7f601
DB
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
983e3246
MN
22/**
23 * @file dct-test.c
115329f1 24 * DCT test. (c) 2001 Fabrice Bellard.
983e3246
MN
25 * Started from sample code by Juan J. Sierralta P.
26 */
27
de6d9b64
FB
28#include <stdlib.h>
29#include <stdio.h>
30#include <string.h>
31#include <sys/time.h>
32#include <unistd.h>
12807c8d 33#include <math.h>
de6d9b64
FB
34
35#include "dsputil.h"
36
86748dbc 37#include "simple_idct.h"
65e4c8c9 38#include "faandct.h"
9e1586fc 39
e366e679
FB
40#ifndef MAX
41#define MAX(a, b) (((a) > (b)) ? (a) : (b))
42#endif
43
434df899
MN
44#undef printf
45
46void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
47
9e1586fc 48/* reference fdct/idct */
de6d9b64 49extern void fdct(DCTELEM *block);
9e1586fc 50extern void idct(DCTELEM *block);
434df899
MN
51extern void ff_idct_xvid_mmx(DCTELEM *block);
52extern void ff_idct_xvid_mmx2(DCTELEM *block);
de6d9b64
FB
53extern void init_fdct();
54
9e1586fc
FB
55extern void ff_mmx_idct(DCTELEM *data);
56extern void ff_mmxext_idct(DCTELEM *data);
57
86748dbc
MN
58extern void odivx_idct_c (short *block);
59
3ac35bdb
MH
60// BFIN
61extern void ff_bfin_idct (DCTELEM *block) ;
62extern void ff_bfin_fdct (DCTELEM *block) ;
63
64// ALTIVEC
65extern void fdct_altivec (DCTELEM *block);
66//extern void idct_altivec (DCTELEM *block);?? no routine
67
68
69struct algo {
70 char *name;
71 enum { FDCT, IDCT } is_idct;
72 void (* func) (DCTELEM *block);
73 void (* ref) (DCTELEM *block);
74 enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM } format;
75};
76
77#ifndef FAAN_POSTSCALE
78#define FAAN_SCALE SCALE_PERM
79#else
80#define FAAN_SCALE NO_PERM
81#endif
82
83#define DCT_ERROR(name,is_idct,func,ref,form) {name,is_idct,func,ref,form}
84
85
86struct algo algos[] = {
87 DCT_ERROR( "REF-DBL", 0, fdct, fdct, NO_PERM),
b0b0d7e7 88 DCT_ERROR("FAAN", 0, ff_faandct, fdct, FAAN_SCALE),
3ac35bdb
MH
89 DCT_ERROR("IJG-AAN-INT", 0, fdct_ifast, fdct, SCALE_PERM),
90 DCT_ERROR("IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, NO_PERM),
91 DCT_ERROR("REF-DBL", 1, idct, idct, NO_PERM),
92 DCT_ERROR("INT", 1, j_rev_dct, idct, MMX_PERM),
93 DCT_ERROR("SIMPLE-C", 1, simple_idct, idct, NO_PERM),
94
ee3035f3 95#ifdef HAVE_MMX
3ac35bdb 96 DCT_ERROR("MMX", 0, ff_fdct_mmx, fdct, NO_PERM),
94254fc0 97#ifdef HAVE_MMX2
3ac35bdb 98 DCT_ERROR("MMX2", 0, ff_fdct_mmx2, fdct, NO_PERM),
94254fc0 99#endif
3ac35bdb 100
b9702de5 101#ifdef CONFIG_GPL
3ac35bdb
MH
102 DCT_ERROR("LIBMPEG2-MMX", 1, ff_mmx_idct, idct, MMX_PERM),
103 DCT_ERROR("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, MMX_PERM),
b9702de5 104#endif
3ac35bdb
MH
105 DCT_ERROR("SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM),
106 DCT_ERROR("XVID-MMX", 1, ff_idct_xvid_mmx, idct, NO_PERM),
107 DCT_ERROR("XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, NO_PERM),
108#endif
109
110#ifdef HAVE_ALTIVEC
111 DCT_ERROR("altivecfdct", 0, fdct_altivec, fdct, NO_PERM),
112#endif
113
114#ifdef ARCH_BFIN
115 DCT_ERROR("BFINfdct", 0, ff_bfin_fdct, fdct, NO_PERM),
116 DCT_ERROR("BFINidct", 1, ff_bfin_idct, idct, NO_PERM),
117#endif
118
119 { 0 }
120};
121
de6d9b64
FB
122#define AANSCALE_BITS 12
123static const unsigned short aanscales[64] = {
124 /* precomputed values scaled up by 14 bits */
125 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
126 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
127 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
128 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
129 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
130 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
131 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
132 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
133};
134
486497e0 135uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
86748dbc 136
0c1a9eda 137int64_t gettime(void)
de6d9b64
FB
138{
139 struct timeval tv;
140 gettimeofday(&tv,NULL);
0c1a9eda 141 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
de6d9b64
FB
142}
143
144#define NB_ITS 20000
145#define NB_ITS_SPEED 50000
146
9e1586fc
FB
147static short idct_mmx_perm[64];
148
86748dbc 149static short idct_simple_mmx_perm[64]={
bb270c08
DB
150 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
151 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
152 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
153 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
154 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
155 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
156 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
157 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
86748dbc
MN
158};
159
9e1586fc
FB
160void idct_mmx_init(void)
161{
162 int i;
163
164 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
165 for (i = 0; i < 64; i++) {
bb270c08
DB
166 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
167// idct_simple_mmx_perm[i] = simple_block_permute_op(i);
9e1586fc
FB
168 }
169}
170
171static DCTELEM block[64] __attribute__ ((aligned (8)));
172static DCTELEM block1[64] __attribute__ ((aligned (8)));
86748dbc 173static DCTELEM block_org[64] __attribute__ ((aligned (8)));
9e1586fc
FB
174
175void dct_error(const char *name, int is_idct,
176 void (*fdct_func)(DCTELEM *block),
3ac35bdb 177 void (*fdct_ref)(DCTELEM *block), int form, int test)
de6d9b64
FB
178{
179 int it, i, scale;
de6d9b64 180 int err_inf, v;
0c1a9eda
ZK
181 int64_t err2, ti, ti1, it1;
182 int64_t sysErr[64], sysErrMax=0;
86748dbc 183 int maxout=0;
86748dbc 184 int blockSumErrMax=0, blockSumErr;
de6d9b64
FB
185
186 srandom(0);
187
188 err_inf = 0;
189 err2 = 0;
86748dbc 190 for(i=0; i<64; i++) sysErr[i]=0;
de6d9b64 191 for(it=0;it<NB_ITS;it++) {
86748dbc
MN
192 for(i=0;i<64;i++)
193 block1[i] = 0;
194 switch(test){
115329f1 195 case 0:
86748dbc
MN
196 for(i=0;i<64;i++)
197 block1[i] = (random() % 512) -256;
ad324c93 198 if (is_idct){
86748dbc 199 fdct(block1);
ad324c93
MN
200
201 for(i=0;i<64;i++)
202 block1[i]>>=3;
203 }
86748dbc
MN
204 break;
205 case 1:{
206 int num= (random()%10)+1;
207 for(i=0;i<num;i++)
208 block1[random()%64] = (random() % 512) -256;
209 }break;
210 case 2:
211 block1[0]= (random()%4096)-2048;
212 block1[63]= (block1[0]&1)^1;
213 break;
214 }
9e1586fc 215
86748dbc
MN
216#if 0 // simulate mismatch control
217{ int sum=0;
218 for(i=0;i<64;i++)
219 sum+=block1[i];
220
115329f1 221 if((sum&1)==0) block1[63]^=1;
86748dbc
MN
222}
223#endif
224
225 for(i=0; i<64; i++)
226 block_org[i]= block1[i];
9e1586fc 227
3ac35bdb 228 if (form == MMX_PERM) {
86748dbc 229 for(i=0;i<64;i++)
9e1586fc 230 block[idct_mmx_perm[i]] = block1[i];
3ac35bdb 231 } else if (form == MMX_SIMPLE_PERM) {
86748dbc
MN
232 for(i=0;i<64;i++)
233 block[idct_simple_mmx_perm[i]] = block1[i];
234
bb270c08 235 } else {
86748dbc
MN
236 for(i=0; i<64; i++)
237 block[i]= block1[i];
9e1586fc 238 }
86748dbc
MN
239#if 0 // simulate mismatch control for tested IDCT but not the ref
240{ int sum=0;
241 for(i=0;i<64;i++)
242 sum+=block[i];
243
115329f1 244 if((sum&1)==0) block[63]^=1;
86748dbc
MN
245}
246#endif
9e1586fc 247
de6d9b64 248 fdct_func(block);
19ef2ba5 249 emms_c(); /* for ff_mmx_idct */
9e1586fc 250
3ac35bdb 251 if (form == SCALE_PERM) {
de6d9b64 252 for(i=0; i<64; i++) {
ad324c93 253 scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
86748dbc
MN
254 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
255 }
256 }
257
9e1586fc 258 fdct_ref(block1);
de6d9b64 259
86748dbc 260 blockSumErr=0;
de6d9b64
FB
261 for(i=0;i<64;i++) {
262 v = abs(block[i] - block1[i]);
263 if (v > err_inf)
264 err_inf = v;
265 err2 += v * v;
bb270c08
DB
266 sysErr[i] += block[i] - block1[i];
267 blockSumErr += v;
268 if( abs(block[i])>maxout) maxout=abs(block[i]);
de6d9b64 269 }
86748dbc
MN
270 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
271#if 0 // print different matrix pairs
272 if(blockSumErr){
273 printf("\n");
274 for(i=0; i<64; i++){
275 if((i&7)==0) printf("\n");
276 printf("%4d ", block_org[i]);
277 }
278 for(i=0; i<64; i++){
279 if((i&7)==0) printf("\n");
280 printf("%4d ", block[i] - block1[i]);
281 }
282 }
283#endif
284 }
c26abfa5 285 for(i=0; i<64; i++) sysErrMax= MAX(sysErrMax, FFABS(sysErr[i]));
115329f1 286
86748dbc
MN
287#if 1 // dump systematic errors
288 for(i=0; i<64; i++){
bb270c08 289 if(i%8==0) printf("\n");
86748dbc 290 printf("%5d ", (int)sysErr[i]);
de6d9b64 291 }
86748dbc
MN
292 printf("\n");
293#endif
115329f1 294
86748dbc 295 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
9e1586fc 296 is_idct ? "IDCT" : "DCT",
86748dbc
MN
297 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
298#if 1 //Speed test
de6d9b64 299 /* speed test */
86748dbc
MN
300 for(i=0;i<64;i++)
301 block1[i] = 0;
302 switch(test){
115329f1 303 case 0:
86748dbc
MN
304 for(i=0;i<64;i++)
305 block1[i] = (random() % 512) -256;
ad324c93 306 if (is_idct){
86748dbc 307 fdct(block1);
ad324c93
MN
308
309 for(i=0;i<64;i++)
310 block1[i]>>=3;
311 }
86748dbc
MN
312 break;
313 case 1:{
314 case 2:
315 block1[0] = (random() % 512) -256;
316 block1[1] = (random() % 512) -256;
317 block1[2] = (random() % 512) -256;
318 block1[3] = (random() % 512) -256;
319 }break;
320 }
de6d9b64 321
3ac35bdb 322 if (form == MMX_PERM) {
86748dbc 323 for(i=0;i<64;i++)
9e1586fc 324 block[idct_mmx_perm[i]] = block1[i];
3ac35bdb 325 } else if(form == MMX_SIMPLE_PERM) {
86748dbc
MN
326 for(i=0;i<64;i++)
327 block[idct_simple_mmx_perm[i]] = block1[i];
328 } else {
329 for(i=0; i<64; i++)
330 block[i]= block1[i];
9e1586fc
FB
331 }
332
de6d9b64
FB
333 ti = gettime();
334 it1 = 0;
335 do {
336 for(it=0;it<NB_ITS_SPEED;it++) {
86748dbc
MN
337 for(i=0; i<64; i++)
338 block[i]= block1[i];
339// memcpy(block, block1, sizeof(DCTELEM) * 64);
340// dont memcpy especially not fastmemcpy because it does movntq !!!
de6d9b64
FB
341 fdct_func(block);
342 }
343 it1 += NB_ITS_SPEED;
344 ti1 = gettime() - ti;
345 } while (ti1 < 1000000);
19ef2ba5 346 emms_c();
de6d9b64 347
86748dbc 348 printf("%s %s: %0.1f kdct/s\n",
9e1586fc 349 is_idct ? "IDCT" : "DCT",
de6d9b64 350 name, (double)it1 * 1000.0 / (double)ti1);
86748dbc 351#endif
de6d9b64
FB
352}
353
0c1a9eda
ZK
354static uint8_t img_dest[64] __attribute__ ((aligned (8)));
355static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
a46a3ce4 356
0c1a9eda 357void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
a46a3ce4
FB
358{
359 static int init;
360 static double c8[8][8];
361 static double c4[4][4];
362 double block1[64], block2[64], block3[64];
363 double s, sum, v;
364 int i, j, k;
365
366 if (!init) {
367 init = 1;
368
369 for(i=0;i<8;i++) {
370 sum = 0;
371 for(j=0;j<8;j++) {
372 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
373 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
374 sum += c8[i][j] * c8[i][j];
375 }
376 }
115329f1 377
a46a3ce4
FB
378 for(i=0;i<4;i++) {
379 sum = 0;
380 for(j=0;j<4;j++) {
381 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
382 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
383 sum += c4[i][j] * c4[i][j];
384 }
385 }
386 }
387
388 /* butterfly */
652f0197 389 s = 0.5 * sqrt(2.0);
a46a3ce4
FB
390 for(i=0;i<4;i++) {
391 for(j=0;j<8;j++) {
652f0197
FB
392 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
393 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
a46a3ce4
FB
394 }
395 }
396
397 /* idct8 on lines */
398 for(i=0;i<8;i++) {
399 for(j=0;j<8;j++) {
400 sum = 0;
401 for(k=0;k<8;k++)
402 sum += c8[k][j] * block1[8*i+k];
403 block2[8*i+j] = sum;
404 }
405 }
406
407 /* idct4 */
408 for(i=0;i<8;i++) {
409 for(j=0;j<4;j++) {
410 /* top */
411 sum = 0;
412 for(k=0;k<4;k++)
413 sum += c4[k][j] * block2[8*(2*k)+i];
414 block3[8*(2*j)+i] = sum;
415
416 /* bottom */
417 sum = 0;
418 for(k=0;k<4;k++)
419 sum += c4[k][j] * block2[8*(2*k+1)+i];
420 block3[8*(2*j+1)+i] = sum;
421 }
422 }
423
424 /* clamp and store the result */
425 for(i=0;i<8;i++) {
426 for(j=0;j<8;j++) {
652f0197 427 v = block3[8*i+j];
a46a3ce4
FB
428 if (v < 0)
429 v = 0;
430 else if (v > 255)
431 v = 255;
432 dest[i * linesize + j] = (int)rint(v);
433 }
434 }
435}
436
115329f1 437void idct248_error(const char *name,
0c1a9eda 438 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
a46a3ce4
FB
439{
440 int it, i, it1, ti, ti1, err_max, v;
441
442 srandom(0);
115329f1 443
a46a3ce4
FB
444 /* just one test to see if code is correct (precision is less
445 important here) */
446 err_max = 0;
447 for(it=0;it<NB_ITS;it++) {
115329f1 448
652f0197
FB
449 /* XXX: use forward transform to generate values */
450 for(i=0;i<64;i++)
451 block1[i] = (random() % 256) - 128;
452 block1[0] += 1024;
453
a46a3ce4
FB
454 for(i=0; i<64; i++)
455 block[i]= block1[i];
456 idct248_ref(img_dest1, 8, block);
115329f1 457
652f0197
FB
458 for(i=0; i<64; i++)
459 block[i]= block1[i];
460 idct248_put(img_dest, 8, block);
115329f1 461
652f0197
FB
462 for(i=0;i<64;i++) {
463 v = abs((int)img_dest[i] - (int)img_dest1[i]);
464 if (v == 255)
465 printf("%d %d\n", img_dest[i], img_dest1[i]);
466 if (v > err_max)
467 err_max = v;
468 }
a46a3ce4
FB
469#if 0
470 printf("ref=\n");
471 for(i=0;i<8;i++) {
472 int j;
473 for(j=0;j<8;j++) {
474 printf(" %3d", img_dest1[i*8+j]);
475 }
476 printf("\n");
477 }
115329f1 478
a46a3ce4
FB
479 printf("out=\n");
480 for(i=0;i<8;i++) {
481 int j;
482 for(j=0;j<8;j++) {
483 printf(" %3d", img_dest[i*8+j]);
484 }
485 printf("\n");
486 }
487#endif
a46a3ce4
FB
488 }
489 printf("%s %s: err_inf=%d\n",
490 1 ? "IDCT248" : "DCT248",
491 name, err_max);
492
493 ti = gettime();
494 it1 = 0;
495 do {
496 for(it=0;it<NB_ITS_SPEED;it++) {
497 for(i=0; i<64; i++)
498 block[i]= block1[i];
499// memcpy(block, block1, sizeof(DCTELEM) * 64);
500// dont memcpy especially not fastmemcpy because it does movntq !!!
501 idct248_put(img_dest, 8, block);
502 }
503 it1 += NB_ITS_SPEED;
504 ti1 = gettime() - ti;
505 } while (ti1 < 1000000);
19ef2ba5 506 emms_c();
a46a3ce4
FB
507
508 printf("%s %s: %0.1f kdct/s\n",
509 1 ? "IDCT248" : "DCT248",
510 name, (double)it1 * 1000.0 / (double)ti1);
511}
512
9e1586fc
FB
513void help(void)
514{
86748dbc
MN
515 printf("dct-test [-i] [<test-number>]\n"
516 "test-number 0 -> test with random matrixes\n"
517 " 1 -> test with random sparse matrixes\n"
518 " 2 -> do 3. test from mpeg4 std\n"
a46a3ce4
FB
519 "-i test IDCT implementations\n"
520 "-4 test IDCT248 implementations\n");
9e1586fc
FB
521}
522
de6d9b64
FB
523int main(int argc, char **argv)
524{
a46a3ce4 525 int test_idct = 0, test_248_dct = 0;
86748dbc
MN
526 int c,i;
527 int test=1;
9e1586fc 528
de6d9b64 529 init_fdct();
9e1586fc 530 idct_mmx_init();
de6d9b64 531
486497e0 532 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
86748dbc 533 for(i=0;i<MAX_NEG_CROP;i++) {
486497e0
MR
534 cropTbl[i] = 0;
535 cropTbl[i + MAX_NEG_CROP + 256] = 255;
86748dbc 536 }
115329f1 537
9e1586fc 538 for(;;) {
a46a3ce4 539 c = getopt(argc, argv, "ih4");
9e1586fc
FB
540 if (c == -1)
541 break;
542 switch(c) {
543 case 'i':
544 test_idct = 1;
545 break;
a46a3ce4
FB
546 case '4':
547 test_248_dct = 1;
548 break;
86748dbc 549 default :
9e1586fc
FB
550 case 'h':
551 help();
c6bdc908 552 return 0;
9e1586fc
FB
553 }
554 }
115329f1 555
86748dbc 556 if(optind <argc) test= atoi(argv[optind]);
115329f1 557
9e1586fc
FB
558 printf("ffmpeg DCT/IDCT test\n");
559
a46a3ce4
FB
560 if (test_248_dct) {
561 idct248_error("SIMPLE-C", simple_idct248_put);
9e1586fc 562 } else {
3ac35bdb
MH
563 for (i=0;algos[i].name;i++)
564 if (algos[i].is_idct == test_idct) {
565 dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
a46a3ce4 566 }
9e1586fc 567 }
de6d9b64
FB
568 return 0;
569}