move some CFLAGS settings away from config.* writing section
[libav.git] / libavcodec / dct-test.c
CommitLineData
04d7f601
DB
1/*
2 * (c) 2001 Fabrice Bellard
3 *
b78e7197
DB
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
04d7f601
DB
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
b78e7197 9 * version 2.1 of the License, or (at your option) any later version.
04d7f601 10 *
b78e7197 11 * FFmpeg is distributed in the hope that it will be useful,
04d7f601
DB
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
b78e7197 17 * License along with FFmpeg; if not, write to the Free Software
04d7f601
DB
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
983e3246
MN
21/**
22 * @file dct-test.c
115329f1 23 * DCT test. (c) 2001 Fabrice Bellard.
983e3246
MN
24 * Started from sample code by Juan J. Sierralta P.
25 */
26
de6d9b64
FB
27#include <stdlib.h>
28#include <stdio.h>
29#include <string.h>
30#include <sys/time.h>
31#include <unistd.h>
32
33#include "dsputil.h"
34
86748dbc 35#include "simple_idct.h"
65e4c8c9 36#include "faandct.h"
9e1586fc 37
e366e679
FB
38#ifndef MAX
39#define MAX(a, b) (((a) > (b)) ? (a) : (b))
40#endif
41
434df899
MN
42#undef printf
43
44void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
45
9e1586fc 46/* reference fdct/idct */
de6d9b64 47extern void fdct(DCTELEM *block);
9e1586fc 48extern void idct(DCTELEM *block);
434df899
MN
49extern void ff_idct_xvid_mmx(DCTELEM *block);
50extern void ff_idct_xvid_mmx2(DCTELEM *block);
de6d9b64
FB
51extern void init_fdct();
52
9e1586fc
FB
53extern void j_rev_dct(DCTELEM *data);
54extern void ff_mmx_idct(DCTELEM *data);
55extern void ff_mmxext_idct(DCTELEM *data);
56
86748dbc
MN
57extern void odivx_idct_c (short *block);
58
de6d9b64
FB
59#define AANSCALE_BITS 12
60static const unsigned short aanscales[64] = {
61 /* precomputed values scaled up by 14 bits */
62 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
63 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
64 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
65 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
66 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
67 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
68 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
69 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
70};
71
be6ed6ff 72uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP];
86748dbc 73
0c1a9eda 74int64_t gettime(void)
de6d9b64
FB
75{
76 struct timeval tv;
77 gettimeofday(&tv,NULL);
0c1a9eda 78 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
de6d9b64
FB
79}
80
81#define NB_ITS 20000
82#define NB_ITS_SPEED 50000
83
9e1586fc
FB
84static short idct_mmx_perm[64];
85
86748dbc 86static short idct_simple_mmx_perm[64]={
bb270c08
DB
87 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
88 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
89 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
90 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
91 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
92 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
93 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
94 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
86748dbc
MN
95};
96
9e1586fc
FB
97void idct_mmx_init(void)
98{
99 int i;
100
101 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
102 for (i = 0; i < 64; i++) {
bb270c08
DB
103 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
104// idct_simple_mmx_perm[i] = simple_block_permute_op(i);
9e1586fc
FB
105 }
106}
107
108static DCTELEM block[64] __attribute__ ((aligned (8)));
109static DCTELEM block1[64] __attribute__ ((aligned (8)));
86748dbc 110static DCTELEM block_org[64] __attribute__ ((aligned (8)));
9e1586fc
FB
111
112void dct_error(const char *name, int is_idct,
113 void (*fdct_func)(DCTELEM *block),
86748dbc 114 void (*fdct_ref)(DCTELEM *block), int test)
de6d9b64
FB
115{
116 int it, i, scale;
de6d9b64 117 int err_inf, v;
0c1a9eda
ZK
118 int64_t err2, ti, ti1, it1;
119 int64_t sysErr[64], sysErrMax=0;
86748dbc 120 int maxout=0;
86748dbc 121 int blockSumErrMax=0, blockSumErr;
de6d9b64
FB
122
123 srandom(0);
124
125 err_inf = 0;
126 err2 = 0;
86748dbc 127 for(i=0; i<64; i++) sysErr[i]=0;
de6d9b64 128 for(it=0;it<NB_ITS;it++) {
86748dbc
MN
129 for(i=0;i<64;i++)
130 block1[i] = 0;
131 switch(test){
115329f1 132 case 0:
86748dbc
MN
133 for(i=0;i<64;i++)
134 block1[i] = (random() % 512) -256;
ad324c93 135 if (is_idct){
86748dbc 136 fdct(block1);
ad324c93
MN
137
138 for(i=0;i<64;i++)
139 block1[i]>>=3;
140 }
86748dbc
MN
141 break;
142 case 1:{
143 int num= (random()%10)+1;
144 for(i=0;i<num;i++)
145 block1[random()%64] = (random() % 512) -256;
146 }break;
147 case 2:
148 block1[0]= (random()%4096)-2048;
149 block1[63]= (block1[0]&1)^1;
150 break;
151 }
9e1586fc 152
86748dbc
MN
153#if 0 // simulate mismatch control
154{ int sum=0;
155 for(i=0;i<64;i++)
156 sum+=block1[i];
157
115329f1 158 if((sum&1)==0) block1[63]^=1;
86748dbc
MN
159}
160#endif
161
162 for(i=0; i<64; i++)
163 block_org[i]= block1[i];
9e1586fc
FB
164
165 if (fdct_func == ff_mmx_idct ||
86748dbc
MN
166 fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) {
167 for(i=0;i<64;i++)
9e1586fc 168 block[idct_mmx_perm[i]] = block1[i];
a46a3ce4 169 } else if(fdct_func == ff_simple_idct_mmx ) {
86748dbc
MN
170 for(i=0;i<64;i++)
171 block[idct_simple_mmx_perm[i]] = block1[i];
172
bb270c08 173 } else {
86748dbc
MN
174 for(i=0; i<64; i++)
175 block[i]= block1[i];
9e1586fc 176 }
86748dbc
MN
177#if 0 // simulate mismatch control for tested IDCT but not the ref
178{ int sum=0;
179 for(i=0;i<64;i++)
180 sum+=block[i];
181
115329f1 182 if((sum&1)==0) block[63]^=1;
86748dbc
MN
183}
184#endif
9e1586fc 185
de6d9b64 186 fdct_func(block);
9e1586fc
FB
187 emms(); /* for ff_mmx_idct */
188
115329f1
DB
189 if (fdct_func == fdct_ifast
190#ifndef FAAN_POSTSCALE
b4c3816c
MN
191 || fdct_func == ff_faandct
192#endif
193 ) {
de6d9b64 194 for(i=0; i<64; i++) {
ad324c93 195 scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
86748dbc
MN
196 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
197 }
198 }
199
9e1586fc 200 fdct_ref(block1);
de6d9b64 201
86748dbc 202 blockSumErr=0;
de6d9b64
FB
203 for(i=0;i<64;i++) {
204 v = abs(block[i] - block1[i]);
205 if (v > err_inf)
206 err_inf = v;
207 err2 += v * v;
bb270c08
DB
208 sysErr[i] += block[i] - block1[i];
209 blockSumErr += v;
210 if( abs(block[i])>maxout) maxout=abs(block[i]);
de6d9b64 211 }
86748dbc
MN
212 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
213#if 0 // print different matrix pairs
214 if(blockSumErr){
215 printf("\n");
216 for(i=0; i<64; i++){
217 if((i&7)==0) printf("\n");
218 printf("%4d ", block_org[i]);
219 }
220 for(i=0; i<64; i++){
221 if((i&7)==0) printf("\n");
222 printf("%4d ", block[i] - block1[i]);
223 }
224 }
225#endif
226 }
c26abfa5 227 for(i=0; i<64; i++) sysErrMax= MAX(sysErrMax, FFABS(sysErr[i]));
115329f1 228
86748dbc
MN
229#if 1 // dump systematic errors
230 for(i=0; i<64; i++){
bb270c08 231 if(i%8==0) printf("\n");
86748dbc 232 printf("%5d ", (int)sysErr[i]);
de6d9b64 233 }
86748dbc
MN
234 printf("\n");
235#endif
115329f1 236
86748dbc 237 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
9e1586fc 238 is_idct ? "IDCT" : "DCT",
86748dbc
MN
239 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
240#if 1 //Speed test
de6d9b64 241 /* speed test */
86748dbc
MN
242 for(i=0;i<64;i++)
243 block1[i] = 0;
244 switch(test){
115329f1 245 case 0:
86748dbc
MN
246 for(i=0;i<64;i++)
247 block1[i] = (random() % 512) -256;
ad324c93 248 if (is_idct){
86748dbc 249 fdct(block1);
ad324c93
MN
250
251 for(i=0;i<64;i++)
252 block1[i]>>=3;
253 }
86748dbc
MN
254 break;
255 case 1:{
256 case 2:
257 block1[0] = (random() % 512) -256;
258 block1[1] = (random() % 512) -256;
259 block1[2] = (random() % 512) -256;
260 block1[3] = (random() % 512) -256;
261 }break;
262 }
de6d9b64 263
9e1586fc 264 if (fdct_func == ff_mmx_idct ||
86748dbc
MN
265 fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) {
266 for(i=0;i<64;i++)
9e1586fc 267 block[idct_mmx_perm[i]] = block1[i];
a46a3ce4 268 } else if(fdct_func == ff_simple_idct_mmx ) {
86748dbc
MN
269 for(i=0;i<64;i++)
270 block[idct_simple_mmx_perm[i]] = block1[i];
271 } else {
272 for(i=0; i<64; i++)
273 block[i]= block1[i];
9e1586fc
FB
274 }
275
de6d9b64
FB
276 ti = gettime();
277 it1 = 0;
278 do {
279 for(it=0;it<NB_ITS_SPEED;it++) {
86748dbc
MN
280 for(i=0; i<64; i++)
281 block[i]= block1[i];
282// memcpy(block, block1, sizeof(DCTELEM) * 64);
283// dont memcpy especially not fastmemcpy because it does movntq !!!
de6d9b64
FB
284 fdct_func(block);
285 }
286 it1 += NB_ITS_SPEED;
287 ti1 = gettime() - ti;
288 } while (ti1 < 1000000);
9e1586fc 289 emms();
de6d9b64 290
86748dbc 291 printf("%s %s: %0.1f kdct/s\n",
9e1586fc 292 is_idct ? "IDCT" : "DCT",
de6d9b64 293 name, (double)it1 * 1000.0 / (double)ti1);
86748dbc 294#endif
de6d9b64
FB
295}
296
0c1a9eda
ZK
297static uint8_t img_dest[64] __attribute__ ((aligned (8)));
298static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
a46a3ce4 299
0c1a9eda 300void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
a46a3ce4
FB
301{
302 static int init;
303 static double c8[8][8];
304 static double c4[4][4];
305 double block1[64], block2[64], block3[64];
306 double s, sum, v;
307 int i, j, k;
308
309 if (!init) {
310 init = 1;
311
312 for(i=0;i<8;i++) {
313 sum = 0;
314 for(j=0;j<8;j++) {
315 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
316 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
317 sum += c8[i][j] * c8[i][j];
318 }
319 }
115329f1 320
a46a3ce4
FB
321 for(i=0;i<4;i++) {
322 sum = 0;
323 for(j=0;j<4;j++) {
324 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
325 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
326 sum += c4[i][j] * c4[i][j];
327 }
328 }
329 }
330
331 /* butterfly */
652f0197 332 s = 0.5 * sqrt(2.0);
a46a3ce4
FB
333 for(i=0;i<4;i++) {
334 for(j=0;j<8;j++) {
652f0197
FB
335 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
336 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
a46a3ce4
FB
337 }
338 }
339
340 /* idct8 on lines */
341 for(i=0;i<8;i++) {
342 for(j=0;j<8;j++) {
343 sum = 0;
344 for(k=0;k<8;k++)
345 sum += c8[k][j] * block1[8*i+k];
346 block2[8*i+j] = sum;
347 }
348 }
349
350 /* idct4 */
351 for(i=0;i<8;i++) {
352 for(j=0;j<4;j++) {
353 /* top */
354 sum = 0;
355 for(k=0;k<4;k++)
356 sum += c4[k][j] * block2[8*(2*k)+i];
357 block3[8*(2*j)+i] = sum;
358
359 /* bottom */
360 sum = 0;
361 for(k=0;k<4;k++)
362 sum += c4[k][j] * block2[8*(2*k+1)+i];
363 block3[8*(2*j+1)+i] = sum;
364 }
365 }
366
367 /* clamp and store the result */
368 for(i=0;i<8;i++) {
369 for(j=0;j<8;j++) {
652f0197 370 v = block3[8*i+j];
a46a3ce4
FB
371 if (v < 0)
372 v = 0;
373 else if (v > 255)
374 v = 255;
375 dest[i * linesize + j] = (int)rint(v);
376 }
377 }
378}
379
115329f1 380void idct248_error(const char *name,
0c1a9eda 381 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
a46a3ce4
FB
382{
383 int it, i, it1, ti, ti1, err_max, v;
384
385 srandom(0);
115329f1 386
a46a3ce4
FB
387 /* just one test to see if code is correct (precision is less
388 important here) */
389 err_max = 0;
390 for(it=0;it<NB_ITS;it++) {
115329f1 391
652f0197
FB
392 /* XXX: use forward transform to generate values */
393 for(i=0;i<64;i++)
394 block1[i] = (random() % 256) - 128;
395 block1[0] += 1024;
396
a46a3ce4
FB
397 for(i=0; i<64; i++)
398 block[i]= block1[i];
399 idct248_ref(img_dest1, 8, block);
115329f1 400
652f0197
FB
401 for(i=0; i<64; i++)
402 block[i]= block1[i];
403 idct248_put(img_dest, 8, block);
115329f1 404
652f0197
FB
405 for(i=0;i<64;i++) {
406 v = abs((int)img_dest[i] - (int)img_dest1[i]);
407 if (v == 255)
408 printf("%d %d\n", img_dest[i], img_dest1[i]);
409 if (v > err_max)
410 err_max = v;
411 }
a46a3ce4
FB
412#if 0
413 printf("ref=\n");
414 for(i=0;i<8;i++) {
415 int j;
416 for(j=0;j<8;j++) {
417 printf(" %3d", img_dest1[i*8+j]);
418 }
419 printf("\n");
420 }
115329f1 421
a46a3ce4
FB
422 printf("out=\n");
423 for(i=0;i<8;i++) {
424 int j;
425 for(j=0;j<8;j++) {
426 printf(" %3d", img_dest[i*8+j]);
427 }
428 printf("\n");
429 }
430#endif
a46a3ce4
FB
431 }
432 printf("%s %s: err_inf=%d\n",
433 1 ? "IDCT248" : "DCT248",
434 name, err_max);
435
436 ti = gettime();
437 it1 = 0;
438 do {
439 for(it=0;it<NB_ITS_SPEED;it++) {
440 for(i=0; i<64; i++)
441 block[i]= block1[i];
442// memcpy(block, block1, sizeof(DCTELEM) * 64);
443// dont memcpy especially not fastmemcpy because it does movntq !!!
444 idct248_put(img_dest, 8, block);
445 }
446 it1 += NB_ITS_SPEED;
447 ti1 = gettime() - ti;
448 } while (ti1 < 1000000);
449 emms();
450
451 printf("%s %s: %0.1f kdct/s\n",
452 1 ? "IDCT248" : "DCT248",
453 name, (double)it1 * 1000.0 / (double)ti1);
454}
455
9e1586fc
FB
456void help(void)
457{
86748dbc
MN
458 printf("dct-test [-i] [<test-number>]\n"
459 "test-number 0 -> test with random matrixes\n"
460 " 1 -> test with random sparse matrixes\n"
461 " 2 -> do 3. test from mpeg4 std\n"
a46a3ce4
FB
462 "-i test IDCT implementations\n"
463 "-4 test IDCT248 implementations\n");
9e1586fc
FB
464 exit(1);
465}
466
de6d9b64
FB
467int main(int argc, char **argv)
468{
a46a3ce4 469 int test_idct = 0, test_248_dct = 0;
86748dbc
MN
470 int c,i;
471 int test=1;
9e1586fc 472
de6d9b64 473 init_fdct();
9e1586fc 474 idct_mmx_init();
de6d9b64 475
be6ed6ff 476 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
86748dbc 477 for(i=0;i<MAX_NEG_CROP;i++) {
be6ed6ff
MR
478 ff_cropTbl[i] = 0;
479 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
86748dbc 480 }
115329f1 481
9e1586fc 482 for(;;) {
a46a3ce4 483 c = getopt(argc, argv, "ih4");
9e1586fc
FB
484 if (c == -1)
485 break;
486 switch(c) {
487 case 'i':
488 test_idct = 1;
489 break;
a46a3ce4
FB
490 case '4':
491 test_248_dct = 1;
492 break;
86748dbc 493 default :
9e1586fc
FB
494 case 'h':
495 help();
496 break;
497 }
498 }
115329f1 499
86748dbc 500 if(optind <argc) test= atoi(argv[optind]);
115329f1 501
9e1586fc
FB
502 printf("ffmpeg DCT/IDCT test\n");
503
a46a3ce4
FB
504 if (test_248_dct) {
505 idct248_error("SIMPLE-C", simple_idct248_put);
9e1586fc 506 } else {
a46a3ce4
FB
507 if (!test_idct) {
508 dct_error("REF-DBL", 0, fdct, fdct, test); /* only to verify code ! */
509 dct_error("IJG-AAN-INT", 0, fdct_ifast, fdct, test);
510 dct_error("IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, test);
511 dct_error("MMX", 0, ff_fdct_mmx, fdct, test);
94789b9e 512 dct_error("MMX2", 0, ff_fdct_mmx2, fdct, test);
65e4c8c9 513 dct_error("FAAN", 0, ff_faandct, fdct, test);
a46a3ce4
FB
514 } else {
515 dct_error("REF-DBL", 1, idct, idct, test);
516 dct_error("INT", 1, j_rev_dct, idct, test);
517 dct_error("LIBMPEG2-MMX", 1, ff_mmx_idct, idct, test);
518 dct_error("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, test);
519 dct_error("SIMPLE-C", 1, simple_idct, idct, test);
520 dct_error("SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, test);
434df899
MN
521 dct_error("XVID-MMX", 1, ff_idct_xvid_mmx, idct, test);
522 dct_error("XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, test);
a46a3ce4
FB
523 // dct_error("ODIVX-C", 1, odivx_idct_c, idct);
524 //printf(" test against odivx idct\n");
bb270c08 525 // dct_error("REF", 1, idct, odivx_idct_c);
a46a3ce4
FB
526 // dct_error("INT", 1, j_rev_dct, odivx_idct_c);
527 // dct_error("MMX", 1, ff_mmx_idct, odivx_idct_c);
528 // dct_error("MMXEXT", 1, ff_mmxext_idct, odivx_idct_c);
529 // dct_error("SIMPLE-C", 1, simple_idct, odivx_idct_c);
530 // dct_error("SIMPLE-MMX", 1, ff_simple_idct_mmx, odivx_idct_c);
531 // dct_error("ODIVX-C", 1, odivx_idct_c, odivx_idct_c);
532 }
9e1586fc 533 }
de6d9b64
FB
534 return 0;
535}