Revert "tiff: support reading gray+alpha at 8 bits"
[libav.git] / libavcodec / dct-test.c
CommitLineData
04d7f601
DB
1/*
2 * (c) 2001 Fabrice Bellard
3ac35bdb 3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
04d7f601 4 *
2912e87a 5 * This file is part of Libav.
b78e7197 6 *
2912e87a 7 * Libav is free software; you can redistribute it and/or
04d7f601
DB
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
b78e7197 10 * version 2.1 of the License, or (at your option) any later version.
04d7f601 11 *
2912e87a 12 * Libav is distributed in the hope that it will be useful,
04d7f601
DB
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
2912e87a 18 * License along with Libav; if not, write to the Free Software
04d7f601
DB
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
983e3246 22/**
ba87f080 23 * @file
94f694a4 24 * DCT test (c) 2001 Fabrice Bellard
983e3246
MN
25 * Started from sample code by Juan J. Sierralta P.
26 */
27
667fb97a 28#include "config.h"
de6d9b64
FB
29#include <stdlib.h>
30#include <stdio.h>
31#include <string.h>
667fb97a 32#if HAVE_UNISTD_H
de6d9b64 33#include <unistd.h>
667fb97a 34#endif
12807c8d 35#include <math.h>
de6d9b64 36
c6c98d08 37#include "libavutil/cpu.h"
ae32e509 38#include "libavutil/common.h"
294eaa26 39#include "libavutil/lfg.h"
980f81d9 40#include "libavutil/time.h"
de6d9b64 41
5d3d39c7 42#include "dct.h"
e0a2e60c 43#include "idctdsp.h"
86748dbc 44#include "simple_idct.h"
10ac3618 45#include "aandcttab.h"
65e4c8c9 46#include "faandct.h"
6f08c541 47#include "faanidct.h"
4de8b606 48#include "arm/idct.h"
1a583c0c 49#include "ppc/fdct.h"
85cabb8d 50#include "x86/fdct.h"
a6493a8f 51#include "x86/idct_xvid.h"
5dcc2015 52#include "x86/simple_idct.h"
6a813295 53#include "dctref.h"
9e1586fc 54
3ac35bdb 55struct algo {
36fa9ef3 56 const char *name;
88bd7fdc 57 void (*func)(int16_t *block);
e0a2e60c 58 enum idct_permutation_type perm_type;
746ad4e0 59 int cpu_flag;
dbf396d4 60 int nonspec;
3ac35bdb
MH
61};
62
4b357756 63static const struct algo fdct_tab[] = {
e0a2e60c
DB
64 { "REF-DBL", ff_ref_fdct, FF_IDCT_PERM_NONE },
65 { "FAAN", ff_faandct, FF_IDCT_PERM_NONE },
66 { "IJG-AAN-INT", ff_fdct_ifast, FF_IDCT_PERM_NONE },
67 { "IJG-LLM-INT", ff_jpeg_fdct_islow_8, FF_IDCT_PERM_NONE },
3ac35bdb 68
17337f54 69#if HAVE_MMX_INLINE
e0a2e60c 70 { "MMX", ff_fdct_mmx, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMX },
0b8b2ae5
DB
71#endif
72#if HAVE_MMXEXT_INLINE
e0a2e60c 73 { "MMXEXT", ff_fdct_mmxext, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMXEXT },
0b8b2ae5
DB
74#endif
75#if HAVE_SSE2_INLINE
e0a2e60c 76 { "SSE2", ff_fdct_sse2, FF_IDCT_PERM_NONE, AV_CPU_FLAG_SSE2 },
94254fc0 77#endif
3ac35bdb 78
4b357756 79#if HAVE_ALTIVEC
e0a2e60c 80 { "altivecfdct", ff_fdct_altivec, FF_IDCT_PERM_NONE, AV_CPU_FLAG_ALTIVEC },
4b357756
MR
81#endif
82
4b357756
MR
83 { 0 }
84};
85
86static const struct algo idct_tab[] = {
e0a2e60c
DB
87 { "FAANI", ff_faanidct, FF_IDCT_PERM_NONE },
88 { "REF-DBL", ff_ref_idct, FF_IDCT_PERM_NONE },
89 { "INT", ff_j_rev_dct, FF_IDCT_PERM_LIBMPEG2 },
90 { "SIMPLE-C", ff_simple_idct_8, FF_IDCT_PERM_NONE },
4b357756 91
17337f54 92#if HAVE_MMX_INLINE
e0a2e60c
DB
93 { "SIMPLE-MMX", ff_simple_idct_mmx, FF_IDCT_PERM_SIMPLE, AV_CPU_FLAG_MMX },
94 { "XVID-MMX", ff_idct_xvid_mmx, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMX, 1 },
0b8b2ae5
DB
95#endif
96#if HAVE_MMXEXT_INLINE
e0a2e60c 97 { "XVID-MMXEXT", ff_idct_xvid_mmxext, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMXEXT, 1 },
0b8b2ae5
DB
98#endif
99#if HAVE_SSE2_INLINE
e0a2e60c 100 { "XVID-SSE2", ff_idct_xvid_sse2, FF_IDCT_PERM_SSE2, AV_CPU_FLAG_SSE2, 1 },
3ac35bdb
MH
101#endif
102
b250f9c6 103#if ARCH_ARM
e0a2e60c
DB
104 { "SIMPLE-ARM", ff_simple_idct_arm, FF_IDCT_PERM_NONE },
105 { "INT-ARM", ff_j_rev_dct_arm, FF_IDCT_PERM_LIBMPEG2 },
4b357756 106#endif
b250f9c6 107#if HAVE_ARMV5TE
e0a2e60c 108 { "SIMPLE-ARMV5TE", ff_simple_idct_armv5te, FF_IDCT_PERM_NONE, AV_CPU_FLAG_ARMV5TE },
479044ce 109#endif
b250f9c6 110#if HAVE_ARMV6
e0a2e60c 111 { "SIMPLE-ARMV6", ff_simple_idct_armv6, FF_IDCT_PERM_LIBMPEG2, AV_CPU_FLAG_ARMV6 },
479044ce 112#endif
1e9265cd 113#if HAVE_NEON && ARCH_ARM
e0a2e60c 114 { "SIMPLE-NEON", ff_simple_idct_neon, FF_IDCT_PERM_PARTTRANS, AV_CPU_FLAG_NEON },
479044ce 115#endif
479044ce 116
36fa9ef3 117 { 0 }
3ac35bdb
MH
118};
119
de6d9b64 120#define AANSCALE_BITS 12
de6d9b64 121
de6d9b64
FB
122#define NB_ITS 20000
123#define NB_ITS_SPEED 50000
124
36fa9ef3
MR
125static short idct_simple_mmx_perm[64] = {
126 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
127 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
128 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
129 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
130 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
131 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
132 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
133 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
86748dbc
MN
134};
135
36fa9ef3 136static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
ad246860 137
88bd7fdc
DB
138DECLARE_ALIGNED(16, static int16_t, block)[64];
139DECLARE_ALIGNED(8, static int16_t, block1)[64];
9e1586fc 140
88bd7fdc 141static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng)
ae2e8971
MR
142{
143 int i, j;
144
145 memset(block, 0, 64 * sizeof(*block));
146
147 switch (test) {
148 case 0:
149 for (i = 0; i < 64; i++)
150 block[i] = (av_lfg_get(prng) % 512) - 256;
151 if (is_idct) {
152 ff_ref_fdct(block);
153 for (i = 0; i < 64; i++)
154 block[i] >>= 3;
155 }
156 break;
157 case 1:
158 j = av_lfg_get(prng) % 10 + 1;
159 for (i = 0; i < j; i++)
160 block[av_lfg_get(prng) % 64] = av_lfg_get(prng) % 512 - 256;
161 break;
162 case 2:
163 block[ 0] = av_lfg_get(prng) % 4096 - 2048;
164 block[63] = (block[0] & 1) ^ 1;
165 break;
166 }
167}
168
e0a2e60c
DB
169static void permute(int16_t dst[64], const int16_t src[64],
170 enum idct_permutation_type perm_type)
ae2e8971
MR
171{
172 int i;
173
e0a2e60c
DB
174 switch (perm_type) {
175 case FF_IDCT_PERM_LIBMPEG2:
ae2e8971 176 for (i = 0; i < 64; i++)
913fa85a 177 dst[(i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2)] = src[i];
e0a2e60c
DB
178 break;
179 case FF_IDCT_PERM_SIMPLE:
ae2e8971
MR
180 for (i = 0; i < 64; i++)
181 dst[idct_simple_mmx_perm[i]] = src[i];
e0a2e60c
DB
182 break;
183 case FF_IDCT_PERM_SSE2:
ae2e8971
MR
184 for (i = 0; i < 64; i++)
185 dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];
e0a2e60c
DB
186 break;
187 case FF_IDCT_PERM_PARTTRANS:
ae2e8971
MR
188 for (i = 0; i < 64; i++)
189 dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
e0a2e60c
DB
190 break;
191 default:
ae2e8971
MR
192 for (i = 0; i < 64; i++)
193 dst[i] = src[i];
e0a2e60c 194 break;
ae2e8971
MR
195 }
196}
197
dbf396d4 198static int dct_error(const struct algo *dct, int test, int is_idct, int speed)
de6d9b64 199{
88bd7fdc 200 void (*ref)(int16_t *block) = is_idct ? ff_ref_idct : ff_ref_fdct;
de6d9b64 201 int it, i, scale;
de6d9b64 202 int err_inf, v;
dbf396d4 203 int64_t err2, ti, ti1, it1, err_sum = 0;
36fa9ef3
MR
204 int64_t sysErr[64], sysErrMax = 0;
205 int maxout = 0;
206 int blockSumErrMax = 0, blockSumErr;
64bde197 207 AVLFG prng;
dbf396d4
MR
208 double omse, ome;
209 int spec_err;
de6d9b64 210
64bde197 211 av_lfg_init(&prng, 1);
de6d9b64
FB
212
213 err_inf = 0;
214 err2 = 0;
36fa9ef3
MR
215 for (i = 0; i < 64; i++)
216 sysErr[i] = 0;
217 for (it = 0; it < NB_ITS; it++) {
ae2e8971 218 init_block(block1, test, is_idct, &prng);
e0a2e60c 219 permute(block, block1, dct->perm_type);
9e1586fc 220
4f905a65 221 dct->func(block);
db7d8fb4 222 emms_c();
9e1586fc 223
e0a2e60c 224 if (!strcmp(dct->name, "IJG-AAN-INT")) {
36fa9ef3
MR
225 for (i = 0; i < 64; i++) {
226 scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
227 block[i] = (block[i] * scale) >> AANSCALE_BITS;
86748dbc
MN
228 }
229 }
230
74965f26 231 ref(block1);
de6d9b64 232
36fa9ef3
MR
233 blockSumErr = 0;
234 for (i = 0; i < 64; i++) {
dbf396d4
MR
235 int err = block[i] - block1[i];
236 err_sum += err;
237 v = abs(err);
de6d9b64
FB
238 if (v > err_inf)
239 err_inf = v;
240 err2 += v * v;
bb270c08
DB
241 sysErr[i] += block[i] - block1[i];
242 blockSumErr += v;
36fa9ef3
MR
243 if (abs(block[i]) > maxout)
244 maxout = abs(block[i]);
de6d9b64 245 }
36fa9ef3
MR
246 if (blockSumErrMax < blockSumErr)
247 blockSumErrMax = blockSumErr;
86748dbc 248 }
36fa9ef3
MR
249 for (i = 0; i < 64; i++)
250 sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i]));
115329f1 251
36fa9ef3
MR
252 for (i = 0; i < 64; i++) {
253 if (i % 8 == 0)
254 printf("\n");
255 printf("%7d ", (int) sysErr[i]);
de6d9b64 256 }
86748dbc 257 printf("\n");
115329f1 258
dbf396d4
MR
259 omse = (double) err2 / NB_ITS / 64;
260 ome = (double) err_sum / NB_ITS / 64;
261
262 spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
263
264 printf("%s %s: ppe=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
cf2b4f88 265 is_idct ? "IDCT" : "DCT", dct->name, err_inf,
dbf396d4 266 omse, ome, (double) sysErrMax / NB_ITS,
36fa9ef3 267 maxout, blockSumErrMax);
e6ff0648 268
dbf396d4
MR
269 if (spec_err && !dct->nonspec)
270 return 1;
271
7fd2c138 272 if (!speed)
dbf396d4 273 return 0;
7fd2c138 274
de6d9b64 275 /* speed test */
ae2e8971 276 init_block(block, test, is_idct, &prng);
e0a2e60c 277 permute(block1, block, dct->perm_type);
9e1586fc 278
980f81d9 279 ti = av_gettime();
de6d9b64
FB
280 it1 = 0;
281 do {
36fa9ef3 282 for (it = 0; it < NB_ITS_SPEED; it++) {
ae2e8971 283 memcpy(block, block1, sizeof(block));
4f905a65 284 dct->func(block);
de6d9b64
FB
285 }
286 it1 += NB_ITS_SPEED;
980f81d9 287 ti1 = av_gettime() - ti;
de6d9b64 288 } while (ti1 < 1000000);
db7d8fb4 289 emms_c();
de6d9b64 290
cf2b4f88 291 printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name,
36fa9ef3 292 (double) it1 * 1000.0 / (double) ti1);
dbf396d4
MR
293
294 return 0;
de6d9b64
FB
295}
296
c6727809
MR
297DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
298DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];
a46a3ce4 299
504ffed1 300static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
a46a3ce4
FB
301{
302 static int init;
303 static double c8[8][8];
304 static double c4[4][4];
305 double block1[64], block2[64], block3[64];
306 double s, sum, v;
307 int i, j, k;
308
309 if (!init) {
310 init = 1;
311
36fa9ef3 312 for (i = 0; i < 8; i++) {
a46a3ce4 313 sum = 0;
36fa9ef3
MR
314 for (j = 0; j < 8; j++) {
315 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
a46a3ce4
FB
316 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
317 sum += c8[i][j] * c8[i][j];
318 }
319 }
115329f1 320
36fa9ef3 321 for (i = 0; i < 4; i++) {
a46a3ce4 322 sum = 0;
36fa9ef3
MR
323 for (j = 0; j < 4; j++) {
324 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
a46a3ce4
FB
325 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
326 sum += c4[i][j] * c4[i][j];
327 }
328 }
329 }
330
331 /* butterfly */
652f0197 332 s = 0.5 * sqrt(2.0);
36fa9ef3
MR
333 for (i = 0; i < 4; i++) {
334 for (j = 0; j < 8; j++) {
335 block1[8 * (2 * i) + j] =
336 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
337 block1[8 * (2 * i + 1) + j] =
338 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
a46a3ce4
FB
339 }
340 }
341
342 /* idct8 on lines */
36fa9ef3
MR
343 for (i = 0; i < 8; i++) {
344 for (j = 0; j < 8; j++) {
a46a3ce4 345 sum = 0;
36fa9ef3
MR
346 for (k = 0; k < 8; k++)
347 sum += c8[k][j] * block1[8 * i + k];
348 block2[8 * i + j] = sum;
a46a3ce4
FB
349 }
350 }
351
352 /* idct4 */
36fa9ef3
MR
353 for (i = 0; i < 8; i++) {
354 for (j = 0; j < 4; j++) {
a46a3ce4
FB
355 /* top */
356 sum = 0;
36fa9ef3
MR
357 for (k = 0; k < 4; k++)
358 sum += c4[k][j] * block2[8 * (2 * k) + i];
359 block3[8 * (2 * j) + i] = sum;
a46a3ce4
FB
360
361 /* bottom */
362 sum = 0;
36fa9ef3
MR
363 for (k = 0; k < 4; k++)
364 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
365 block3[8 * (2 * j + 1) + i] = sum;
a46a3ce4
FB
366 }
367 }
368
369 /* clamp and store the result */
36fa9ef3
MR
370 for (i = 0; i < 8; i++) {
371 for (j = 0; j < 8; j++) {
372 v = block3[8 * i + j];
373 if (v < 0) v = 0;
374 else if (v > 255) v = 255;
375 dest[i * linesize + j] = (int) rint(v);
a46a3ce4
FB
376 }
377 }
378}
379
504ffed1 380static void idct248_error(const char *name,
36fa9ef3 381 void (*idct248_put)(uint8_t *dest, int line_size,
7fd2c138
MR
382 int16_t *block),
383 int speed)
a46a3ce4
FB
384{
385 int it, i, it1, ti, ti1, err_max, v;
64bde197 386 AVLFG prng;
294eaa26 387
64bde197 388 av_lfg_init(&prng, 1);
115329f1 389
a46a3ce4
FB
390 /* just one test to see if code is correct (precision is less
391 important here) */
392 err_max = 0;
36fa9ef3 393 for (it = 0; it < NB_ITS; it++) {
652f0197 394 /* XXX: use forward transform to generate values */
36fa9ef3 395 for (i = 0; i < 64; i++)
64bde197 396 block1[i] = av_lfg_get(&prng) % 256 - 128;
652f0197
FB
397 block1[0] += 1024;
398
36fa9ef3
MR
399 for (i = 0; i < 64; i++)
400 block[i] = block1[i];
a46a3ce4 401 idct248_ref(img_dest1, 8, block);
115329f1 402
36fa9ef3
MR
403 for (i = 0; i < 64; i++)
404 block[i] = block1[i];
652f0197 405 idct248_put(img_dest, 8, block);
115329f1 406
36fa9ef3
MR
407 for (i = 0; i < 64; i++) {
408 v = abs((int) img_dest[i] - (int) img_dest1[i]);
652f0197
FB
409 if (v == 255)
410 printf("%d %d\n", img_dest[i], img_dest1[i]);
411 if (v > err_max)
412 err_max = v;
413 }
a46a3ce4 414 }
36fa9ef3 415 printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
a46a3ce4 416
7fd2c138
MR
417 if (!speed)
418 return;
419
980f81d9 420 ti = av_gettime();
a46a3ce4
FB
421 it1 = 0;
422 do {
36fa9ef3
MR
423 for (it = 0; it < NB_ITS_SPEED; it++) {
424 for (i = 0; i < 64; i++)
425 block[i] = block1[i];
a46a3ce4
FB
426 idct248_put(img_dest, 8, block);
427 }
428 it1 += NB_ITS_SPEED;
980f81d9 429 ti1 = av_gettime() - ti;
a46a3ce4 430 } while (ti1 < 1000000);
db7d8fb4 431 emms_c();
a46a3ce4 432
36fa9ef3
MR
433 printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
434 (double) it1 * 1000.0 / (double) ti1);
a46a3ce4
FB
435}
436
504ffed1 437static void help(void)
9e1586fc 438{
86748dbc
MN
439 printf("dct-test [-i] [<test-number>]\n"
440 "test-number 0 -> test with random matrixes\n"
441 " 1 -> test with random sparse matrixes\n"
442 " 2 -> do 3. test from mpeg4 std\n"
a46a3ce4 443 "-i test IDCT implementations\n"
7fd2c138
MR
444 "-4 test IDCT248 implementations\n"
445 "-t speed test\n");
9e1586fc
FB
446}
447
667fb97a
RB
448#if !HAVE_GETOPT
449#include "compat/getopt.c"
450#endif
451
de6d9b64
FB
452int main(int argc, char **argv)
453{
a46a3ce4 454 int test_idct = 0, test_248_dct = 0;
36fa9ef3
MR
455 int c, i;
456 int test = 1;
7fd2c138 457 int speed = 0;
dbf396d4 458 int err = 0;
36fa9ef3 459
0de74546 460 ff_ref_dct_init();
f67a10cd 461
36fa9ef3 462 for (;;) {
7fd2c138 463 c = getopt(argc, argv, "ih4t");
9e1586fc
FB
464 if (c == -1)
465 break;
36fa9ef3 466 switch (c) {
9e1586fc
FB
467 case 'i':
468 test_idct = 1;
469 break;
a46a3ce4
FB
470 case '4':
471 test_248_dct = 1;
472 break;
7fd2c138
MR
473 case 't':
474 speed = 1;
475 break;
36fa9ef3 476 default:
9e1586fc
FB
477 case 'h':
478 help();
c6bdc908 479 return 0;
9e1586fc
FB
480 }
481 }
115329f1 482
36fa9ef3
MR
483 if (optind < argc)
484 test = atoi(argv[optind]);
115329f1 485
f36b3902 486 printf("Libav DCT/IDCT test\n");
9e1586fc 487
a46a3ce4 488 if (test_248_dct) {
7fd2c138 489 idct248_error("SIMPLE-C", ff_simple_idct248_put, speed);
9e1586fc 490 } else {
cb44b21d 491 const int cpu_flags = av_get_cpu_flags();
4b357756 492 const struct algo *algos = test_idct ? idct_tab : fdct_tab;
36fa9ef3 493 for (i = 0; algos[i].name; i++)
746ad4e0 494 if (!(~cpu_flags & algos[i].cpu_flag)) {
dbf396d4 495 err |= dct_error(&algos[i], test, test_idct, speed);
36fa9ef3 496 }
9e1586fc 497 }
dbf396d4 498
5331d2b9
DB
499 if (err)
500 printf("Error: %d.\n", err);
501
502 return !!err;
de6d9b64 503}