Commit | Line | Data |
---|---|---|
ff4ec49e | 1 | /* DCT test. (c) 2001 Fabrice Bellard. |
de6d9b64 FB |
2 | Started from sample code by Juan J. Sierralta P. |
3 | */ | |
4 | #include <stdlib.h> | |
5 | #include <stdio.h> | |
6 | #include <string.h> | |
7 | #include <sys/time.h> | |
8 | #include <unistd.h> | |
9e1586fc | 9 | #include <getopt.h> |
de6d9b64 FB |
10 | |
11 | #include "dsputil.h" | |
12 | ||
9e1586fc FB |
13 | #include "i386/mmx.h" |
14 | ||
15 | /* reference fdct/idct */ | |
de6d9b64 | 16 | extern void fdct(DCTELEM *block); |
9e1586fc | 17 | extern void idct(DCTELEM *block); |
de6d9b64 FB |
18 | extern void init_fdct(); |
19 | ||
9e1586fc FB |
20 | extern void j_rev_dct(DCTELEM *data); |
21 | extern void ff_mmx_idct(DCTELEM *data); | |
22 | extern void ff_mmxext_idct(DCTELEM *data); | |
23 | ||
de6d9b64 FB |
24 | #define AANSCALE_BITS 12 |
25 | static const unsigned short aanscales[64] = { | |
26 | /* precomputed values scaled up by 14 bits */ | |
27 | 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, | |
28 | 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, | |
29 | 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, | |
30 | 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, | |
31 | 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, | |
32 | 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, | |
33 | 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, | |
34 | 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 | |
35 | }; | |
36 | ||
37 | INT64 gettime(void) | |
38 | { | |
39 | struct timeval tv; | |
40 | gettimeofday(&tv,NULL); | |
41 | return (INT64)tv.tv_sec * 1000000 + tv.tv_usec; | |
42 | } | |
43 | ||
44 | #define NB_ITS 20000 | |
45 | #define NB_ITS_SPEED 50000 | |
46 | ||
9e1586fc FB |
47 | static short idct_mmx_perm[64]; |
48 | ||
49 | void idct_mmx_init(void) | |
50 | { | |
51 | int i; | |
52 | ||
53 | /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */ | |
54 | for (i = 0; i < 64; i++) { | |
55 | idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); | |
56 | } | |
57 | } | |
58 | ||
59 | static DCTELEM block[64] __attribute__ ((aligned (8))); | |
60 | static DCTELEM block1[64] __attribute__ ((aligned (8))); | |
61 | ||
62 | void dct_error(const char *name, int is_idct, | |
63 | void (*fdct_func)(DCTELEM *block), | |
64 | void (*fdct_ref)(DCTELEM *block)) | |
de6d9b64 FB |
65 | { |
66 | int it, i, scale; | |
de6d9b64 FB |
67 | int err_inf, v; |
68 | INT64 err2, ti, ti1, it1; | |
69 | ||
70 | srandom(0); | |
71 | ||
72 | err_inf = 0; | |
73 | err2 = 0; | |
74 | for(it=0;it<NB_ITS;it++) { | |
75 | for(i=0;i<64;i++) | |
76 | block1[i] = random() % 256; | |
9e1586fc FB |
77 | |
78 | /* for idct test, generate inverse idct data */ | |
79 | if (is_idct) | |
80 | fdct(block1); | |
81 | ||
82 | if (fdct_func == ff_mmx_idct || | |
83 | fdct_func == j_rev_dct) { | |
84 | for(i=0;i<64;i++) | |
85 | block[idct_mmx_perm[i]] = block1[i]; | |
86 | } else { | |
87 | memcpy(block, block1, sizeof(DCTELEM) * 64); | |
88 | } | |
89 | ||
de6d9b64 | 90 | fdct_func(block); |
9e1586fc FB |
91 | emms(); /* for ff_mmx_idct */ |
92 | ||
03c94ede | 93 | if (fdct_func == fdct_ifast) { |
de6d9b64 FB |
94 | for(i=0; i<64; i++) { |
95 | scale = (1 << (AANSCALE_BITS + 11)) / aanscales[i]; | |
96 | block[i] = (block[i] * scale) >> AANSCALE_BITS; | |
97 | } | |
98 | } | |
99 | ||
9e1586fc | 100 | fdct_ref(block1); |
de6d9b64 FB |
101 | |
102 | for(i=0;i<64;i++) { | |
103 | v = abs(block[i] - block1[i]); | |
104 | if (v > err_inf) | |
105 | err_inf = v; | |
106 | err2 += v * v; | |
107 | } | |
108 | } | |
9e1586fc FB |
109 | printf("%s %s: err_inf=%d err2=%0.2f\n", |
110 | is_idct ? "IDCT" : "DCT", | |
de6d9b64 FB |
111 | name, err_inf, (double)err2 / NB_ITS / 64.0); |
112 | ||
113 | /* speed test */ | |
114 | for(i=0;i<64;i++) | |
115 | block1[i] = 255 - 63 + i; | |
116 | ||
9e1586fc FB |
117 | /* for idct test, generate inverse idct data */ |
118 | if (is_idct) | |
119 | fdct(block1); | |
120 | if (fdct_func == ff_mmx_idct || | |
121 | fdct_func == j_rev_dct) { | |
122 | for(i=0;i<64;i++) | |
123 | block[idct_mmx_perm[i]] = block1[i]; | |
124 | } | |
125 | ||
de6d9b64 FB |
126 | ti = gettime(); |
127 | it1 = 0; | |
128 | do { | |
129 | for(it=0;it<NB_ITS_SPEED;it++) { | |
130 | memcpy(block, block1, sizeof(DCTELEM) * 64); | |
131 | fdct_func(block); | |
132 | } | |
133 | it1 += NB_ITS_SPEED; | |
134 | ti1 = gettime() - ti; | |
135 | } while (ti1 < 1000000); | |
9e1586fc | 136 | emms(); |
de6d9b64 | 137 | |
9e1586fc FB |
138 | printf("%s %s: %0.1f kdct/s\n", |
139 | is_idct ? "IDCT" : "DCT", | |
de6d9b64 FB |
140 | name, (double)it1 * 1000.0 / (double)ti1); |
141 | } | |
142 | ||
9e1586fc FB |
143 | void help(void) |
144 | { | |
145 | printf("dct-test [-i]\n" | |
146 | "test DCT implementations\n"); | |
147 | exit(1); | |
148 | } | |
149 | ||
de6d9b64 FB |
150 | int main(int argc, char **argv) |
151 | { | |
9e1586fc FB |
152 | int test_idct = 0; |
153 | int c; | |
154 | ||
de6d9b64 | 155 | init_fdct(); |
9e1586fc | 156 | idct_mmx_init(); |
de6d9b64 | 157 | |
9e1586fc FB |
158 | for(;;) { |
159 | c = getopt(argc, argv, "ih"); | |
160 | if (c == -1) | |
161 | break; | |
162 | switch(c) { | |
163 | case 'i': | |
164 | test_idct = 1; | |
165 | break; | |
166 | case 'h': | |
167 | help(); | |
168 | break; | |
169 | } | |
170 | } | |
171 | ||
172 | printf("ffmpeg DCT/IDCT test\n"); | |
173 | ||
174 | if (!test_idct) { | |
175 | dct_error("REF", 0, fdct, fdct); /* only to verify code ! */ | |
03c94ede | 176 | dct_error("AAN", 0, fdct_ifast, fdct); |
9e1586fc FB |
177 | dct_error("MMX", 0, fdct_mmx, fdct); |
178 | } else { | |
179 | dct_error("REF", 1, idct, idct); | |
180 | dct_error("INT", 1, j_rev_dct, idct); | |
181 | dct_error("MMX", 1, ff_mmx_idct, idct); | |
182 | // dct_error("MMX", 1, ff_mmxext_idct, idct); | |
183 | } | |
de6d9b64 FB |
184 | return 0; |
185 | } |