Commit | Line | Data |
---|---|---|
ff4ec49e | 1 | /* DCT test. (c) 2001 Fabrice Bellard. |
de6d9b64 FB |
2 | Started from sample code by Juan J. Sierralta P. |
3 | */ | |
4 | #include <stdlib.h> | |
5 | #include <stdio.h> | |
6 | #include <string.h> | |
7 | #include <sys/time.h> | |
8 | #include <unistd.h> | |
9e1586fc | 9 | #include <getopt.h> |
de6d9b64 FB |
10 | |
11 | #include "dsputil.h" | |
12 | ||
9e1586fc | 13 | #include "i386/mmx.h" |
86748dbc | 14 | #include "simple_idct.h" |
9e1586fc FB |
15 | |
16 | /* reference fdct/idct */ | |
de6d9b64 | 17 | extern void fdct(DCTELEM *block); |
9e1586fc | 18 | extern void idct(DCTELEM *block); |
de6d9b64 FB |
19 | extern void init_fdct(); |
20 | ||
9e1586fc FB |
21 | extern void j_rev_dct(DCTELEM *data); |
22 | extern void ff_mmx_idct(DCTELEM *data); | |
23 | extern void ff_mmxext_idct(DCTELEM *data); | |
24 | ||
86748dbc MN |
25 | extern void odivx_idct_c (short *block); |
26 | ||
de6d9b64 FB |
27 | #define AANSCALE_BITS 12 |
28 | static const unsigned short aanscales[64] = { | |
29 | /* precomputed values scaled up by 14 bits */ | |
30 | 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, | |
31 | 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, | |
32 | 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, | |
33 | 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, | |
34 | 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, | |
35 | 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, | |
36 | 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, | |
37 | 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 | |
38 | }; | |
39 | ||
86748dbc MN |
40 | UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; |
41 | ||
de6d9b64 FB |
42 | INT64 gettime(void) |
43 | { | |
44 | struct timeval tv; | |
45 | gettimeofday(&tv,NULL); | |
46 | return (INT64)tv.tv_sec * 1000000 + tv.tv_usec; | |
47 | } | |
48 | ||
49 | #define NB_ITS 20000 | |
50 | #define NB_ITS_SPEED 50000 | |
51 | ||
9e1586fc FB |
52 | static short idct_mmx_perm[64]; |
53 | ||
86748dbc MN |
54 | static short idct_simple_mmx_perm[64]={ |
55 | 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, | |
56 | 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |
57 | 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |
58 | 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |
59 | 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |
60 | 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |
61 | 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |
62 | 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |
63 | }; | |
64 | ||
9e1586fc FB |
65 | void idct_mmx_init(void) |
66 | { | |
67 | int i; | |
68 | ||
69 | /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */ | |
70 | for (i = 0; i < 64; i++) { | |
71 | idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); | |
86748dbc | 72 | // idct_simple_mmx_perm[i] = simple_block_permute_op(i); |
9e1586fc FB |
73 | } |
74 | } | |
75 | ||
76 | static DCTELEM block[64] __attribute__ ((aligned (8))); | |
77 | static DCTELEM block1[64] __attribute__ ((aligned (8))); | |
86748dbc | 78 | static DCTELEM block_org[64] __attribute__ ((aligned (8))); |
9e1586fc FB |
79 | |
80 | void dct_error(const char *name, int is_idct, | |
81 | void (*fdct_func)(DCTELEM *block), | |
86748dbc | 82 | void (*fdct_ref)(DCTELEM *block), int test) |
de6d9b64 FB |
83 | { |
84 | int it, i, scale; | |
de6d9b64 FB |
85 | int err_inf, v; |
86 | INT64 err2, ti, ti1, it1; | |
86748dbc MN |
87 | INT64 sysErr[64], sysErrMax=0; |
88 | int maxout=0; | |
89 | int max_sum=0; | |
90 | int blockSumErrMax=0, blockSumErr; | |
de6d9b64 FB |
91 | |
92 | srandom(0); | |
93 | ||
94 | err_inf = 0; | |
95 | err2 = 0; | |
86748dbc | 96 | for(i=0; i<64; i++) sysErr[i]=0; |
de6d9b64 | 97 | for(it=0;it<NB_ITS;it++) { |
86748dbc MN |
98 | for(i=0;i<64;i++) |
99 | block1[i] = 0; | |
100 | switch(test){ | |
101 | case 0: | |
102 | for(i=0;i<64;i++) | |
103 | block1[i] = (random() % 512) -256; | |
ad324c93 | 104 | if (is_idct){ |
86748dbc | 105 | fdct(block1); |
ad324c93 MN |
106 | |
107 | for(i=0;i<64;i++) | |
108 | block1[i]>>=3; | |
109 | } | |
86748dbc MN |
110 | break; |
111 | case 1:{ | |
112 | int num= (random()%10)+1; | |
113 | for(i=0;i<num;i++) | |
114 | block1[random()%64] = (random() % 512) -256; | |
115 | }break; | |
116 | case 2: | |
117 | block1[0]= (random()%4096)-2048; | |
118 | block1[63]= (block1[0]&1)^1; | |
119 | break; | |
120 | } | |
9e1586fc | 121 | |
86748dbc MN |
122 | #if 0 // simulate mismatch control |
123 | { int sum=0; | |
124 | for(i=0;i<64;i++) | |
125 | sum+=block1[i]; | |
126 | ||
127 | if((sum&1)==0) block1[63]^=1; | |
128 | } | |
129 | #endif | |
130 | ||
131 | for(i=0; i<64; i++) | |
132 | block_org[i]= block1[i]; | |
9e1586fc FB |
133 | |
134 | if (fdct_func == ff_mmx_idct || | |
86748dbc MN |
135 | fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) { |
136 | for(i=0;i<64;i++) | |
9e1586fc | 137 | block[idct_mmx_perm[i]] = block1[i]; |
86748dbc MN |
138 | } else if(fdct_func == simple_idct_mmx ) { |
139 | for(i=0;i<64;i++) | |
140 | block[idct_simple_mmx_perm[i]] = block1[i]; | |
141 | ||
142 | } else { | |
143 | for(i=0; i<64; i++) | |
144 | block[i]= block1[i]; | |
9e1586fc | 145 | } |
86748dbc MN |
146 | #if 0 // simulate mismatch control for tested IDCT but not the ref |
147 | { int sum=0; | |
148 | for(i=0;i<64;i++) | |
149 | sum+=block[i]; | |
150 | ||
151 | if((sum&1)==0) block[63]^=1; | |
152 | } | |
153 | #endif | |
9e1586fc | 154 | |
de6d9b64 | 155 | fdct_func(block); |
9e1586fc FB |
156 | emms(); /* for ff_mmx_idct */ |
157 | ||
03c94ede | 158 | if (fdct_func == fdct_ifast) { |
de6d9b64 | 159 | for(i=0; i<64; i++) { |
ad324c93 | 160 | scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i]; |
86748dbc MN |
161 | block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS; |
162 | } | |
163 | } | |
164 | ||
9e1586fc | 165 | fdct_ref(block1); |
de6d9b64 | 166 | |
86748dbc | 167 | blockSumErr=0; |
de6d9b64 FB |
168 | for(i=0;i<64;i++) { |
169 | v = abs(block[i] - block1[i]); | |
170 | if (v > err_inf) | |
171 | err_inf = v; | |
172 | err2 += v * v; | |
86748dbc MN |
173 | sysErr[i] += block[i] - block1[i]; |
174 | blockSumErr += v; | |
175 | if( abs(block[i])>maxout) maxout=abs(block[i]); | |
de6d9b64 | 176 | } |
86748dbc MN |
177 | if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr; |
178 | #if 0 // print different matrix pairs | |
179 | if(blockSumErr){ | |
180 | printf("\n"); | |
181 | for(i=0; i<64; i++){ | |
182 | if((i&7)==0) printf("\n"); | |
183 | printf("%4d ", block_org[i]); | |
184 | } | |
185 | for(i=0; i<64; i++){ | |
186 | if((i&7)==0) printf("\n"); | |
187 | printf("%4d ", block[i] - block1[i]); | |
188 | } | |
189 | } | |
190 | #endif | |
191 | } | |
192 | for(i=0; i<64; i++) sysErrMax= MAX(sysErrMax, ABS(sysErr[i])); | |
193 | ||
194 | #if 1 // dump systematic errors | |
195 | for(i=0; i<64; i++){ | |
196 | if(i%8==0) printf("\n"); | |
197 | printf("%5d ", (int)sysErr[i]); | |
de6d9b64 | 198 | } |
86748dbc MN |
199 | printf("\n"); |
200 | #endif | |
201 | ||
202 | printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n", | |
9e1586fc | 203 | is_idct ? "IDCT" : "DCT", |
86748dbc MN |
204 | name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax); |
205 | #if 1 //Speed test | |
de6d9b64 | 206 | /* speed test */ |
86748dbc MN |
207 | for(i=0;i<64;i++) |
208 | block1[i] = 0; | |
209 | switch(test){ | |
210 | case 0: | |
211 | for(i=0;i<64;i++) | |
212 | block1[i] = (random() % 512) -256; | |
ad324c93 | 213 | if (is_idct){ |
86748dbc | 214 | fdct(block1); |
ad324c93 MN |
215 | |
216 | for(i=0;i<64;i++) | |
217 | block1[i]>>=3; | |
218 | } | |
86748dbc MN |
219 | break; |
220 | case 1:{ | |
221 | case 2: | |
222 | block1[0] = (random() % 512) -256; | |
223 | block1[1] = (random() % 512) -256; | |
224 | block1[2] = (random() % 512) -256; | |
225 | block1[3] = (random() % 512) -256; | |
226 | }break; | |
227 | } | |
de6d9b64 | 228 | |
9e1586fc | 229 | if (fdct_func == ff_mmx_idct || |
86748dbc MN |
230 | fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) { |
231 | for(i=0;i<64;i++) | |
9e1586fc | 232 | block[idct_mmx_perm[i]] = block1[i]; |
86748dbc MN |
233 | } else if(fdct_func == simple_idct_mmx ) { |
234 | for(i=0;i<64;i++) | |
235 | block[idct_simple_mmx_perm[i]] = block1[i]; | |
236 | } else { | |
237 | for(i=0; i<64; i++) | |
238 | block[i]= block1[i]; | |
9e1586fc FB |
239 | } |
240 | ||
de6d9b64 FB |
241 | ti = gettime(); |
242 | it1 = 0; | |
243 | do { | |
244 | for(it=0;it<NB_ITS_SPEED;it++) { | |
86748dbc MN |
245 | for(i=0; i<64; i++) |
246 | block[i]= block1[i]; | |
247 | // memcpy(block, block1, sizeof(DCTELEM) * 64); | |
248 | // dont memcpy especially not fastmemcpy because it does movntq !!! | |
de6d9b64 FB |
249 | fdct_func(block); |
250 | } | |
251 | it1 += NB_ITS_SPEED; | |
252 | ti1 = gettime() - ti; | |
253 | } while (ti1 < 1000000); | |
9e1586fc | 254 | emms(); |
de6d9b64 | 255 | |
86748dbc | 256 | printf("%s %s: %0.1f kdct/s\n", |
9e1586fc | 257 | is_idct ? "IDCT" : "DCT", |
de6d9b64 | 258 | name, (double)it1 * 1000.0 / (double)ti1); |
86748dbc | 259 | #endif |
de6d9b64 FB |
260 | } |
261 | ||
9e1586fc FB |
262 | void help(void) |
263 | { | |
86748dbc MN |
264 | printf("dct-test [-i] [<test-number>]\n" |
265 | "test-number 0 -> test with random matrixes\n" | |
266 | " 1 -> test with random sparse matrixes\n" | |
267 | " 2 -> do 3. test from mpeg4 std\n" | |
268 | "-i test IDCT implementations\n"); | |
9e1586fc FB |
269 | exit(1); |
270 | } | |
271 | ||
de6d9b64 FB |
272 | int main(int argc, char **argv) |
273 | { | |
9e1586fc | 274 | int test_idct = 0; |
86748dbc MN |
275 | int c,i; |
276 | int test=1; | |
9e1586fc | 277 | |
de6d9b64 | 278 | init_fdct(); |
9e1586fc | 279 | idct_mmx_init(); |
de6d9b64 | 280 | |
86748dbc MN |
281 | for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; |
282 | for(i=0;i<MAX_NEG_CROP;i++) { | |
283 | cropTbl[i] = 0; | |
284 | cropTbl[i + MAX_NEG_CROP + 256] = 255; | |
285 | } | |
286 | ||
9e1586fc FB |
287 | for(;;) { |
288 | c = getopt(argc, argv, "ih"); | |
289 | if (c == -1) | |
290 | break; | |
291 | switch(c) { | |
292 | case 'i': | |
293 | test_idct = 1; | |
294 | break; | |
86748dbc | 295 | default : |
9e1586fc FB |
296 | case 'h': |
297 | help(); | |
298 | break; | |
299 | } | |
300 | } | |
86748dbc MN |
301 | |
302 | if(optind <argc) test= atoi(argv[optind]); | |
9e1586fc FB |
303 | |
304 | printf("ffmpeg DCT/IDCT test\n"); | |
305 | ||
306 | if (!test_idct) { | |
86748dbc MN |
307 | dct_error("REF-DBL", 0, fdct, fdct, test); /* only to verify code ! */ |
308 | dct_error("IJG-AAN-INT", 0, fdct_ifast, fdct, test); | |
309 | dct_error("IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, test); | |
3f09f52a | 310 | dct_error("MMX", 0, ff_fdct_mmx, fdct, test); |
9e1586fc | 311 | } else { |
86748dbc MN |
312 | dct_error("REF-DBL", 1, idct, idct, test); |
313 | dct_error("INT", 1, j_rev_dct, idct, test); | |
314 | dct_error("LIBMPEG2-MMX", 1, ff_mmx_idct, idct, test); | |
315 | dct_error("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, test); | |
316 | dct_error("SIMPLE-C", 1, simple_idct, idct, test); | |
317 | dct_error("SIMPLE-MMX", 1, simple_idct_mmx, idct, test); | |
318 | // dct_error("ODIVX-C", 1, odivx_idct_c, idct); | |
319 | //printf(" test against odivx idct\n"); | |
320 | // dct_error("REF", 1, idct, odivx_idct_c); | |
321 | // dct_error("INT", 1, j_rev_dct, odivx_idct_c); | |
322 | // dct_error("MMX", 1, ff_mmx_idct, odivx_idct_c); | |
323 | // dct_error("MMXEXT", 1, ff_mmxext_idct, odivx_idct_c); | |
324 | // dct_error("SIMPLE-C", 1, simple_idct, odivx_idct_c); | |
325 | // dct_error("SIMPLE-MMX", 1, simple_idct_mmx, odivx_idct_c); | |
326 | // dct_error("ODIVX-C", 1, odivx_idct_c, odivx_idct_c); | |
9e1586fc | 327 | } |
de6d9b64 FB |
328 | return 0; |
329 | } |