fixing msmpeg4 decoding if fps < 16 (i thought it was a indicator for the ext header...
[libav.git] / libavcodec / dsputil.c
CommitLineData
de6d9b64
FB
1/*
2 * DSP utils
3 * Copyright (c) 2000, 2001 Gerard Lantau.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19#include <stdlib.h>
20#include <stdio.h>
21#include "avcodec.h"
22#include "dsputil.h"
d962f6fd 23#include "simple_idct.h"
de6d9b64 24
4af7bcc1 25void (*ff_idct)(DCTELEM *block);
de6d9b64
FB
26void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
27void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
28void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
29
30op_pixels_abs_func pix_abs16x16;
31op_pixels_abs_func pix_abs16x16_x2;
32op_pixels_abs_func pix_abs16x16_y2;
33op_pixels_abs_func pix_abs16x16_xy2;
34
0cfa9713 35UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
de6d9b64
FB
36UINT32 squareTbl[512];
37
e0eac44e
FB
38extern UINT16 default_intra_matrix[64];
39extern UINT16 default_non_intra_matrix[64];
40
41UINT8 zigzag_direct[64] = {
42 0, 1, 8, 16, 9, 2, 3, 10,
43 17, 24, 32, 25, 18, 11, 4, 5,
44 12, 19, 26, 33, 40, 48, 41, 34,
45 27, 20, 13, 6, 7, 14, 21, 28,
46 35, 42, 49, 56, 57, 50, 43, 36,
47 29, 22, 15, 23, 30, 37, 44, 51,
48 58, 59, 52, 45, 38, 31, 39, 46,
49 53, 60, 61, 54, 47, 55, 62, 63
50};
51
2f349de2
MN
52/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
53UINT16 __align8 inv_zigzag_direct16[64];
54
55/* not permutated zigzag_direct for MMX quantizer */
56UINT8 zigzag_direct_noperm[64];
57
e0eac44e
FB
58UINT8 ff_alternate_horizontal_scan[64] = {
59 0, 1, 2, 3, 8, 9, 16, 17,
60 10, 11, 4, 5, 6, 7, 15, 14,
61 13, 12, 19, 18, 24, 25, 32, 33,
62 26, 27, 20, 21, 22, 23, 28, 29,
63 30, 31, 34, 35, 40, 41, 48, 49,
64 42, 43, 36, 37, 38, 39, 44, 45,
65 46, 47, 50, 51, 56, 57, 58, 59,
66 52, 53, 54, 55, 60, 61, 62, 63,
67};
68
69UINT8 ff_alternate_vertical_scan[64] = {
70 0, 8, 16, 24, 1, 9, 2, 10,
71 17, 25, 32, 40, 48, 56, 57, 49,
72 41, 33, 26, 18, 3, 11, 4, 12,
73 19, 27, 34, 42, 50, 58, 35, 43,
74 51, 59, 20, 28, 5, 13, 6, 14,
75 21, 29, 36, 44, 52, 60, 37, 45,
76 53, 61, 22, 30, 7, 15, 23, 31,
77 38, 46, 54, 62, 39, 47, 55, 63,
78};
79
0a8d8945 80/* Input permutation for the simple_idct_mmx */
5a240838 81static UINT8 simple_mmx_permutation[64]={
0a8d8945
MN
82 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
83 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
84 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
85 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
86 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
87 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
88 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
89 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
5a240838
MN
90};
91
2f349de2
MN
92/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
93UINT32 inverse[256]={
94 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
95 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
96 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
97 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
98 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
99 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
100 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
101 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
102 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
103 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
104 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
105 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
106 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
107 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
108 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
109 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
110 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
111 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
112 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
113 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
114 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
115 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
116 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
117 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
118 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
119 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
120 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
121 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
122 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
123 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
124 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
125 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
126};
127
badaf88e
MN
128/* used to skip zeros at the end */
129UINT8 zigzag_end[64];
130
5a240838
MN
131UINT8 permutation[64];
132//UINT8 invPermutation[64];
133
badaf88e
MN
134static void build_zigzag_end()
135{
136 int lastIndex;
137 int lastIndexAfterPerm=0;
138 for(lastIndex=0; lastIndex<64; lastIndex++)
139 {
140 if(zigzag_direct[lastIndex] > lastIndexAfterPerm)
141 lastIndexAfterPerm= zigzag_direct[lastIndex];
142 zigzag_end[lastIndex]= lastIndexAfterPerm + 1;
143 }
144}
145
de6d9b64
FB
146void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size)
147{
148 DCTELEM *p;
149 const UINT8 *pix;
150 int i;
151
152 /* read the pixels */
153 p = block;
154 pix = pixels;
155 for(i=0;i<8;i++) {
156 p[0] = pix[0];
157 p[1] = pix[1];
158 p[2] = pix[2];
159 p[3] = pix[3];
160 p[4] = pix[4];
161 p[5] = pix[5];
162 p[6] = pix[6];
163 p[7] = pix[7];
164 pix += line_size;
165 p += 8;
166 }
167}
168
169void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
170{
171 const DCTELEM *p;
172 UINT8 *pix;
173 int i;
174 UINT8 *cm = cropTbl + MAX_NEG_CROP;
175
176 /* read the pixels */
177 p = block;
178 pix = pixels;
179 for(i=0;i<8;i++) {
180 pix[0] = cm[p[0]];
181 pix[1] = cm[p[1]];
182 pix[2] = cm[p[2]];
183 pix[3] = cm[p[3]];
184 pix[4] = cm[p[4]];
185 pix[5] = cm[p[5]];
186 pix[6] = cm[p[6]];
187 pix[7] = cm[p[7]];
188 pix += line_size;
189 p += 8;
190 }
191}
192
193void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
194{
195 const DCTELEM *p;
196 UINT8 *pix;
197 int i;
198 UINT8 *cm = cropTbl + MAX_NEG_CROP;
199
200 /* read the pixels */
201 p = block;
202 pix = pixels;
203 for(i=0;i<8;i++) {
204 pix[0] = cm[pix[0] + p[0]];
205 pix[1] = cm[pix[1] + p[1]];
206 pix[2] = cm[pix[2] + p[2]];
207 pix[3] = cm[pix[3] + p[3]];
208 pix[4] = cm[pix[4] + p[4]];
209 pix[5] = cm[pix[5] + p[5]];
210 pix[6] = cm[pix[6] + p[6]];
211 pix[7] = cm[pix[7] + p[7]];
212 pix += line_size;
213 p += 8;
214 }
215}
216
217#define PIXOP(BTYPE, OPNAME, OP, INCR) \
218 \
219static void OPNAME ## _pixels(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
220{ \
221 BTYPE *p; \
222 const UINT8 *pix; \
223 \
224 p = block; \
225 pix = pixels; \
226 do { \
227 OP(p[0], pix[0]); \
228 OP(p[1], pix[1]); \
229 OP(p[2], pix[2]); \
230 OP(p[3], pix[3]); \
231 OP(p[4], pix[4]); \
232 OP(p[5], pix[5]); \
233 OP(p[6], pix[6]); \
234 OP(p[7], pix[7]); \
235 pix += line_size; \
236 p += INCR; \
237 } while (--h);; \
238} \
239 \
240static void OPNAME ## _pixels_x2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
241{ \
242 BTYPE *p; \
243 const UINT8 *pix; \
244 \
245 p = block; \
246 pix = pixels; \
247 do { \
248 OP(p[0], avg2(pix[0], pix[1])); \
249 OP(p[1], avg2(pix[1], pix[2])); \
250 OP(p[2], avg2(pix[2], pix[3])); \
251 OP(p[3], avg2(pix[3], pix[4])); \
252 OP(p[4], avg2(pix[4], pix[5])); \
253 OP(p[5], avg2(pix[5], pix[6])); \
254 OP(p[6], avg2(pix[6], pix[7])); \
255 OP(p[7], avg2(pix[7], pix[8])); \
256 pix += line_size; \
257 p += INCR; \
258 } while (--h); \
259} \
260 \
261static void OPNAME ## _pixels_y2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
262{ \
263 BTYPE *p; \
264 const UINT8 *pix; \
265 const UINT8 *pix1; \
266 \
267 p = block; \
268 pix = pixels; \
269 pix1 = pixels + line_size; \
270 do { \
271 OP(p[0], avg2(pix[0], pix1[0])); \
272 OP(p[1], avg2(pix[1], pix1[1])); \
273 OP(p[2], avg2(pix[2], pix1[2])); \
274 OP(p[3], avg2(pix[3], pix1[3])); \
275 OP(p[4], avg2(pix[4], pix1[4])); \
276 OP(p[5], avg2(pix[5], pix1[5])); \
277 OP(p[6], avg2(pix[6], pix1[6])); \
278 OP(p[7], avg2(pix[7], pix1[7])); \
279 pix += line_size; \
280 pix1 += line_size; \
281 p += INCR; \
282 } while(--h); \
283} \
284 \
285static void OPNAME ## _pixels_xy2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
286{ \
287 BTYPE *p; \
288 const UINT8 *pix; \
289 const UINT8 *pix1; \
290 \
291 p = block; \
292 pix = pixels; \
293 pix1 = pixels + line_size; \
294 do { \
295 OP(p[0], avg4(pix[0], pix[1], pix1[0], pix1[1])); \
296 OP(p[1], avg4(pix[1], pix[2], pix1[1], pix1[2])); \
297 OP(p[2], avg4(pix[2], pix[3], pix1[2], pix1[3])); \
298 OP(p[3], avg4(pix[3], pix[4], pix1[3], pix1[4])); \
299 OP(p[4], avg4(pix[4], pix[5], pix1[4], pix1[5])); \
300 OP(p[5], avg4(pix[5], pix[6], pix1[5], pix1[6])); \
301 OP(p[6], avg4(pix[6], pix[7], pix1[6], pix1[7])); \
302 OP(p[7], avg4(pix[7], pix[8], pix1[7], pix1[8])); \
303 pix += line_size; \
304 pix1 += line_size; \
305 p += INCR; \
306 } while(--h); \
307} \
308 \
309void (*OPNAME ## _pixels_tab[4])(BTYPE *block, const UINT8 *pixels, int line_size, int h) = { \
310 OPNAME ## _pixels, \
311 OPNAME ## _pixels_x2, \
312 OPNAME ## _pixels_y2, \
313 OPNAME ## _pixels_xy2, \
314};
315
316
317/* rounding primitives */
318#define avg2(a,b) ((a+b+1)>>1)
319#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
320
321#define op_put(a, b) a = b
322#define op_avg(a, b) a = avg2(a, b)
323#define op_sub(a, b) a -= b
324
325PIXOP(UINT8, put, op_put, line_size)
326PIXOP(UINT8, avg, op_avg, line_size)
327
328PIXOP(DCTELEM, sub, op_sub, 8)
329
330/* not rounding primitives */
331#undef avg2
332#undef avg4
333#define avg2(a,b) ((a+b)>>1)
334#define avg4(a,b,c,d) ((a+b+c+d+1)>>2)
335
336PIXOP(UINT8, put_no_rnd, op_put, line_size)
337PIXOP(UINT8, avg_no_rnd, op_avg, line_size)
338
339/* motion estimation */
340
341#undef avg2
342#undef avg4
343#define avg2(a,b) ((a+b+1)>>1)
344#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
345
346int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
347{
348 int s, i;
349
350 s = 0;
351 for(i=0;i<h;i++) {
352 s += abs(pix1[0] - pix2[0]);
353 s += abs(pix1[1] - pix2[1]);
354 s += abs(pix1[2] - pix2[2]);
355 s += abs(pix1[3] - pix2[3]);
356 s += abs(pix1[4] - pix2[4]);
357 s += abs(pix1[5] - pix2[5]);
358 s += abs(pix1[6] - pix2[6]);
359 s += abs(pix1[7] - pix2[7]);
360 s += abs(pix1[8] - pix2[8]);
361 s += abs(pix1[9] - pix2[9]);
362 s += abs(pix1[10] - pix2[10]);
363 s += abs(pix1[11] - pix2[11]);
364 s += abs(pix1[12] - pix2[12]);
365 s += abs(pix1[13] - pix2[13]);
366 s += abs(pix1[14] - pix2[14]);
367 s += abs(pix1[15] - pix2[15]);
368 pix1 += line_size;
369 pix2 += line_size;
370 }
371 return s;
372}
373
374int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
375{
376 int s, i;
377
378 s = 0;
379 for(i=0;i<h;i++) {
380 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
381 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
382 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
383 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
384 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
385 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
386 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
387 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
388 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
389 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
390 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
391 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
392 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
393 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
394 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
395 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
396 pix1 += line_size;
397 pix2 += line_size;
398 }
399 return s;
400}
401
402int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
403{
404 int s, i;
405 UINT8 *pix3 = pix2 + line_size;
406
407 s = 0;
408 for(i=0;i<h;i++) {
409 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
410 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
411 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
412 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
413 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
414 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
415 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
416 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
417 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
418 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
419 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
420 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
421 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
422 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
423 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
424 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
425 pix1 += line_size;
426 pix2 += line_size;
427 pix3 += line_size;
428 }
429 return s;
430}
431
432int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
433{
434 int s, i;
435 UINT8 *pix3 = pix2 + line_size;
436
437 s = 0;
438 for(i=0;i<h;i++) {
439 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
440 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
441 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
442 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
443 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
444 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
445 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
446 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
447 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
448 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
449 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
450 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
451 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
452 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
453 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
454 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
455 pix1 += line_size;
456 pix2 += line_size;
457 pix3 += line_size;
458 }
459 return s;
460}
461
e0eac44e
FB
462/* permute block according so that it corresponds to the MMX idct
463 order */
d962f6fd 464#ifdef SIMPLE_IDCT
5a240838 465 /* general permutation, but perhaps slightly slower */
d962f6fd
A
466void block_permute(INT16 *block)
467{
468 int i;
469 INT16 temp[64];
470
d962f6fd
A
471 for(i=0; i<64; i++) temp[ block_permute_op(i) ] = block[i];
472
473 for(i=0; i<64; i++) block[i] = temp[i];
d962f6fd 474}
d962f6fd
A
475#else
476
e0eac44e 477void block_permute(INT16 *block)
de6d9b64 478{
e0eac44e 479 int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
de6d9b64
FB
480 int i;
481
e0eac44e
FB
482 for(i=0;i<8;i++) {
483 tmp1 = block[1];
484 tmp2 = block[2];
485 tmp3 = block[3];
486 tmp4 = block[4];
487 tmp5 = block[5];
488 tmp6 = block[6];
489 block[1] = tmp2;
490 block[2] = tmp4;
491 block[3] = tmp6;
492 block[4] = tmp1;
493 block[5] = tmp3;
494 block[6] = tmp5;
495 block += 8;
496 }
497}
d962f6fd 498#endif
e0eac44e
FB
499
500void dsputil_init(void)
501{
502 int i, j;
c34270f5 503 int use_permuted_idct;
e0eac44e 504
de6d9b64
FB
505 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
506 for(i=0;i<MAX_NEG_CROP;i++) {
507 cropTbl[i] = 0;
508 cropTbl[i + MAX_NEG_CROP + 256] = 255;
509 }
510
511 for(i=0;i<512;i++) {
512 squareTbl[i] = (i - 256) * (i - 256);
513 }
514
d962f6fd
A
515#ifdef SIMPLE_IDCT
516 ff_idct = simple_idct;
517#else
4af7bcc1 518 ff_idct = j_rev_dct;
d962f6fd 519#endif
de6d9b64
FB
520 get_pixels = get_pixels_c;
521 put_pixels_clamped = put_pixels_clamped_c;
522 add_pixels_clamped = add_pixels_clamped_c;
523
524 pix_abs16x16 = pix_abs16x16_c;
525 pix_abs16x16_x2 = pix_abs16x16_x2_c;
526 pix_abs16x16_y2 = pix_abs16x16_y2_c;
527 pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
528 av_fdct = jpeg_fdct_ifast;
529
c34270f5 530 use_permuted_idct = 1;
e0eac44e 531
980fc7b8 532#ifdef HAVE_MMX
de6d9b64
FB
533 dsputil_init_mmx();
534#endif
3d03c0a2
FB
535#ifdef ARCH_ARMV4L
536 dsputil_init_armv4l();
537#endif
c34270f5
FB
538#ifdef HAVE_MLIB
539 dsputil_init_mlib();
540 use_permuted_idct = 0;
541#endif
1e98dffb
NK
542#ifdef ARCH_ALPHA
543 dsputil_init_alpha();
544 use_permuted_idct = 0;
545#endif
c34270f5 546
d962f6fd
A
547#ifdef SIMPLE_IDCT
548 if(ff_idct == simple_idct) use_permuted_idct=0;
549#endif
550
5a240838
MN
551 if(use_permuted_idct)
552#ifdef SIMPLE_IDCT
553 for(i=0; i<64; i++) permutation[i]= simple_mmx_permutation[i];
554#else
555 for(i=0; i<64; i++) permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
556#endif
557 else
558 for(i=0; i<64; i++) permutation[i]=i;
559
2f349de2
MN
560 for(i=0; i<64; i++) inv_zigzag_direct16[zigzag_direct[i]]= i+1;
561 for(i=0; i<64; i++) zigzag_direct_noperm[i]= zigzag_direct[i];
562
c34270f5
FB
563 if (use_permuted_idct) {
564 /* permute for IDCT */
565 for(i=0;i<64;i++) {
566 j = zigzag_direct[i];
567 zigzag_direct[i] = block_permute_op(j);
568 j = ff_alternate_horizontal_scan[i];
569 ff_alternate_horizontal_scan[i] = block_permute_op(j);
570 j = ff_alternate_vertical_scan[i];
571 ff_alternate_vertical_scan[i] = block_permute_op(j);
572 }
573 block_permute(default_intra_matrix);
574 block_permute(default_non_intra_matrix);
575 }
badaf88e
MN
576
577 build_zigzag_end();
de6d9b64 578}