* BeOS patch by Fran├žois Revol <revol@free.fr>
[libav.git] / libavcodec / dsputil.c
CommitLineData
de6d9b64
FB
1/*
2 * DSP utils
3 * Copyright (c) 2000, 2001 Gerard Lantau.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19#include <stdlib.h>
20#include <stdio.h>
43f1708f 21#include <math.h>
de6d9b64
FB
22#include "avcodec.h"
23#include "dsputil.h"
d962f6fd 24#include "simple_idct.h"
de6d9b64 25
4af7bcc1 26void (*ff_idct)(DCTELEM *block);
de6d9b64
FB
27void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
28void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
29void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
30
31op_pixels_abs_func pix_abs16x16;
32op_pixels_abs_func pix_abs16x16_x2;
33op_pixels_abs_func pix_abs16x16_y2;
34op_pixels_abs_func pix_abs16x16_xy2;
35
0cfa9713 36UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
de6d9b64
FB
37UINT32 squareTbl[512];
38
e0eac44e
FB
39extern UINT16 default_intra_matrix[64];
40extern UINT16 default_non_intra_matrix[64];
41
42UINT8 zigzag_direct[64] = {
43 0, 1, 8, 16, 9, 2, 3, 10,
44 17, 24, 32, 25, 18, 11, 4, 5,
45 12, 19, 26, 33, 40, 48, 41, 34,
46 27, 20, 13, 6, 7, 14, 21, 28,
47 35, 42, 49, 56, 57, 50, 43, 36,
48 29, 22, 15, 23, 30, 37, 44, 51,
49 58, 59, 52, 45, 38, 31, 39, 46,
50 53, 60, 61, 54, 47, 55, 62, 63
51};
52
2f349de2
MN
53/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
54UINT16 __align8 inv_zigzag_direct16[64];
55
56/* not permutated zigzag_direct for MMX quantizer */
57UINT8 zigzag_direct_noperm[64];
58
e0eac44e
FB
59UINT8 ff_alternate_horizontal_scan[64] = {
60 0, 1, 2, 3, 8, 9, 16, 17,
61 10, 11, 4, 5, 6, 7, 15, 14,
62 13, 12, 19, 18, 24, 25, 32, 33,
63 26, 27, 20, 21, 22, 23, 28, 29,
64 30, 31, 34, 35, 40, 41, 48, 49,
65 42, 43, 36, 37, 38, 39, 44, 45,
66 46, 47, 50, 51, 56, 57, 58, 59,
67 52, 53, 54, 55, 60, 61, 62, 63,
68};
69
70UINT8 ff_alternate_vertical_scan[64] = {
71 0, 8, 16, 24, 1, 9, 2, 10,
72 17, 25, 32, 40, 48, 56, 57, 49,
73 41, 33, 26, 18, 3, 11, 4, 12,
74 19, 27, 34, 42, 50, 58, 35, 43,
75 51, 59, 20, 28, 5, 13, 6, 14,
76 21, 29, 36, 44, 52, 60, 37, 45,
77 53, 61, 22, 30, 7, 15, 23, 31,
78 38, 46, 54, 62, 39, 47, 55, 63,
79};
80
0a8d8945 81/* Input permutation for the simple_idct_mmx */
5a240838 82static UINT8 simple_mmx_permutation[64]={
0a8d8945
MN
83 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
84 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
85 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
86 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
87 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
88 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
89 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
90 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
5a240838
MN
91};
92
2f349de2
MN
93/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
94UINT32 inverse[256]={
95 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
96 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
97 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
98 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
99 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
100 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
101 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
102 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
103 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
104 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
105 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
106 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
107 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
108 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
109 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
110 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
111 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
112 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
113 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
114 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
115 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
116 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
117 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
118 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
119 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
120 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
121 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
122 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
123 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
124 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
125 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
126 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
127};
128
badaf88e
MN
129/* used to skip zeros at the end */
130UINT8 zigzag_end[64];
131
5a240838
MN
132UINT8 permutation[64];
133//UINT8 invPermutation[64];
134
badaf88e
MN
135static void build_zigzag_end()
136{
137 int lastIndex;
138 int lastIndexAfterPerm=0;
139 for(lastIndex=0; lastIndex<64; lastIndex++)
140 {
141 if(zigzag_direct[lastIndex] > lastIndexAfterPerm)
142 lastIndexAfterPerm= zigzag_direct[lastIndex];
143 zigzag_end[lastIndex]= lastIndexAfterPerm + 1;
144 }
145}
146
de6d9b64
FB
147void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size)
148{
149 DCTELEM *p;
150 const UINT8 *pix;
151 int i;
152
153 /* read the pixels */
154 p = block;
155 pix = pixels;
156 for(i=0;i<8;i++) {
157 p[0] = pix[0];
158 p[1] = pix[1];
159 p[2] = pix[2];
160 p[3] = pix[3];
161 p[4] = pix[4];
162 p[5] = pix[5];
163 p[6] = pix[6];
164 p[7] = pix[7];
165 pix += line_size;
166 p += 8;
167 }
168}
169
170void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
171{
172 const DCTELEM *p;
173 UINT8 *pix;
174 int i;
175 UINT8 *cm = cropTbl + MAX_NEG_CROP;
176
177 /* read the pixels */
178 p = block;
179 pix = pixels;
180 for(i=0;i<8;i++) {
181 pix[0] = cm[p[0]];
182 pix[1] = cm[p[1]];
183 pix[2] = cm[p[2]];
184 pix[3] = cm[p[3]];
185 pix[4] = cm[p[4]];
186 pix[5] = cm[p[5]];
187 pix[6] = cm[p[6]];
188 pix[7] = cm[p[7]];
189 pix += line_size;
190 p += 8;
191 }
192}
193
194void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
195{
196 const DCTELEM *p;
197 UINT8 *pix;
198 int i;
199 UINT8 *cm = cropTbl + MAX_NEG_CROP;
200
201 /* read the pixels */
202 p = block;
203 pix = pixels;
204 for(i=0;i<8;i++) {
205 pix[0] = cm[pix[0] + p[0]];
206 pix[1] = cm[pix[1] + p[1]];
207 pix[2] = cm[pix[2] + p[2]];
208 pix[3] = cm[pix[3] + p[3]];
209 pix[4] = cm[pix[4] + p[4]];
210 pix[5] = cm[pix[5] + p[5]];
211 pix[6] = cm[pix[6] + p[6]];
212 pix[7] = cm[pix[7] + p[7]];
213 pix += line_size;
214 p += 8;
215 }
216}
217
218#define PIXOP(BTYPE, OPNAME, OP, INCR) \
219 \
220static void OPNAME ## _pixels(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
221{ \
222 BTYPE *p; \
223 const UINT8 *pix; \
224 \
225 p = block; \
226 pix = pixels; \
227 do { \
228 OP(p[0], pix[0]); \
229 OP(p[1], pix[1]); \
230 OP(p[2], pix[2]); \
231 OP(p[3], pix[3]); \
232 OP(p[4], pix[4]); \
233 OP(p[5], pix[5]); \
234 OP(p[6], pix[6]); \
235 OP(p[7], pix[7]); \
236 pix += line_size; \
237 p += INCR; \
238 } while (--h);; \
239} \
240 \
241static void OPNAME ## _pixels_x2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
242{ \
243 BTYPE *p; \
244 const UINT8 *pix; \
245 \
246 p = block; \
247 pix = pixels; \
248 do { \
249 OP(p[0], avg2(pix[0], pix[1])); \
250 OP(p[1], avg2(pix[1], pix[2])); \
251 OP(p[2], avg2(pix[2], pix[3])); \
252 OP(p[3], avg2(pix[3], pix[4])); \
253 OP(p[4], avg2(pix[4], pix[5])); \
254 OP(p[5], avg2(pix[5], pix[6])); \
255 OP(p[6], avg2(pix[6], pix[7])); \
256 OP(p[7], avg2(pix[7], pix[8])); \
257 pix += line_size; \
258 p += INCR; \
259 } while (--h); \
260} \
261 \
262static void OPNAME ## _pixels_y2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
263{ \
264 BTYPE *p; \
265 const UINT8 *pix; \
266 const UINT8 *pix1; \
267 \
268 p = block; \
269 pix = pixels; \
270 pix1 = pixels + line_size; \
271 do { \
272 OP(p[0], avg2(pix[0], pix1[0])); \
273 OP(p[1], avg2(pix[1], pix1[1])); \
274 OP(p[2], avg2(pix[2], pix1[2])); \
275 OP(p[3], avg2(pix[3], pix1[3])); \
276 OP(p[4], avg2(pix[4], pix1[4])); \
277 OP(p[5], avg2(pix[5], pix1[5])); \
278 OP(p[6], avg2(pix[6], pix1[6])); \
279 OP(p[7], avg2(pix[7], pix1[7])); \
280 pix += line_size; \
281 pix1 += line_size; \
282 p += INCR; \
283 } while(--h); \
284} \
285 \
286static void OPNAME ## _pixels_xy2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
287{ \
288 BTYPE *p; \
289 const UINT8 *pix; \
290 const UINT8 *pix1; \
291 \
292 p = block; \
293 pix = pixels; \
294 pix1 = pixels + line_size; \
295 do { \
296 OP(p[0], avg4(pix[0], pix[1], pix1[0], pix1[1])); \
297 OP(p[1], avg4(pix[1], pix[2], pix1[1], pix1[2])); \
298 OP(p[2], avg4(pix[2], pix[3], pix1[2], pix1[3])); \
299 OP(p[3], avg4(pix[3], pix[4], pix1[3], pix1[4])); \
300 OP(p[4], avg4(pix[4], pix[5], pix1[4], pix1[5])); \
301 OP(p[5], avg4(pix[5], pix[6], pix1[5], pix1[6])); \
302 OP(p[6], avg4(pix[6], pix[7], pix1[6], pix1[7])); \
303 OP(p[7], avg4(pix[7], pix[8], pix1[7], pix1[8])); \
304 pix += line_size; \
305 pix1 += line_size; \
306 p += INCR; \
307 } while(--h); \
308} \
309 \
310void (*OPNAME ## _pixels_tab[4])(BTYPE *block, const UINT8 *pixels, int line_size, int h) = { \
311 OPNAME ## _pixels, \
312 OPNAME ## _pixels_x2, \
313 OPNAME ## _pixels_y2, \
314 OPNAME ## _pixels_xy2, \
315};
316
317
318/* rounding primitives */
319#define avg2(a,b) ((a+b+1)>>1)
320#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
321
322#define op_put(a, b) a = b
323#define op_avg(a, b) a = avg2(a, b)
324#define op_sub(a, b) a -= b
325
326PIXOP(UINT8, put, op_put, line_size)
327PIXOP(UINT8, avg, op_avg, line_size)
328
329PIXOP(DCTELEM, sub, op_sub, 8)
330
331/* not rounding primitives */
332#undef avg2
333#undef avg4
334#define avg2(a,b) ((a+b)>>1)
335#define avg4(a,b,c,d) ((a+b+c+d+1)>>2)
336
337PIXOP(UINT8, put_no_rnd, op_put, line_size)
338PIXOP(UINT8, avg_no_rnd, op_avg, line_size)
339
340/* motion estimation */
341
342#undef avg2
343#undef avg4
344#define avg2(a,b) ((a+b+1)>>1)
345#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
346
347int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
348{
349 int s, i;
350
351 s = 0;
352 for(i=0;i<h;i++) {
353 s += abs(pix1[0] - pix2[0]);
354 s += abs(pix1[1] - pix2[1]);
355 s += abs(pix1[2] - pix2[2]);
356 s += abs(pix1[3] - pix2[3]);
357 s += abs(pix1[4] - pix2[4]);
358 s += abs(pix1[5] - pix2[5]);
359 s += abs(pix1[6] - pix2[6]);
360 s += abs(pix1[7] - pix2[7]);
361 s += abs(pix1[8] - pix2[8]);
362 s += abs(pix1[9] - pix2[9]);
363 s += abs(pix1[10] - pix2[10]);
364 s += abs(pix1[11] - pix2[11]);
365 s += abs(pix1[12] - pix2[12]);
366 s += abs(pix1[13] - pix2[13]);
367 s += abs(pix1[14] - pix2[14]);
368 s += abs(pix1[15] - pix2[15]);
369 pix1 += line_size;
370 pix2 += line_size;
371 }
372 return s;
373}
374
375int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
376{
377 int s, i;
378
379 s = 0;
380 for(i=0;i<h;i++) {
381 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
382 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
383 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
384 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
385 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
386 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
387 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
388 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
389 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
390 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
391 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
392 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
393 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
394 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
395 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
396 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
397 pix1 += line_size;
398 pix2 += line_size;
399 }
400 return s;
401}
402
403int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
404{
405 int s, i;
406 UINT8 *pix3 = pix2 + line_size;
407
408 s = 0;
409 for(i=0;i<h;i++) {
410 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
411 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
412 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
413 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
414 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
415 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
416 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
417 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
418 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
419 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
420 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
421 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
422 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
423 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
424 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
425 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
426 pix1 += line_size;
427 pix2 += line_size;
428 pix3 += line_size;
429 }
430 return s;
431}
432
433int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
434{
435 int s, i;
436 UINT8 *pix3 = pix2 + line_size;
437
438 s = 0;
439 for(i=0;i<h;i++) {
440 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
441 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
442 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
443 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
444 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
445 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
446 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
447 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
448 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
449 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
450 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
451 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
452 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
453 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
454 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
455 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
456 pix1 += line_size;
457 pix2 += line_size;
458 pix3 += line_size;
459 }
460 return s;
461}
462
e0eac44e
FB
463/* permute block according so that it corresponds to the MMX idct
464 order */
d962f6fd 465#ifdef SIMPLE_IDCT
5a240838 466 /* general permutation, but perhaps slightly slower */
d962f6fd
A
467void block_permute(INT16 *block)
468{
469 int i;
470 INT16 temp[64];
471
d962f6fd
A
472 for(i=0; i<64; i++) temp[ block_permute_op(i) ] = block[i];
473
474 for(i=0; i<64; i++) block[i] = temp[i];
d962f6fd 475}
d962f6fd
A
476#else
477
e0eac44e 478void block_permute(INT16 *block)
de6d9b64 479{
e0eac44e 480 int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
de6d9b64
FB
481 int i;
482
e0eac44e
FB
483 for(i=0;i<8;i++) {
484 tmp1 = block[1];
485 tmp2 = block[2];
486 tmp3 = block[3];
487 tmp4 = block[4];
488 tmp5 = block[5];
489 tmp6 = block[6];
490 block[1] = tmp2;
491 block[2] = tmp4;
492 block[3] = tmp6;
493 block[4] = tmp1;
494 block[5] = tmp3;
495 block[6] = tmp5;
496 block += 8;
497 }
498}
d962f6fd 499#endif
e0eac44e
FB
500
501void dsputil_init(void)
502{
503 int i, j;
c34270f5 504 int use_permuted_idct;
e0eac44e 505
de6d9b64
FB
506 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
507 for(i=0;i<MAX_NEG_CROP;i++) {
508 cropTbl[i] = 0;
509 cropTbl[i + MAX_NEG_CROP + 256] = 255;
510 }
511
512 for(i=0;i<512;i++) {
513 squareTbl[i] = (i - 256) * (i - 256);
514 }
515
d962f6fd
A
516#ifdef SIMPLE_IDCT
517 ff_idct = simple_idct;
518#else
4af7bcc1 519 ff_idct = j_rev_dct;
d962f6fd 520#endif
de6d9b64
FB
521 get_pixels = get_pixels_c;
522 put_pixels_clamped = put_pixels_clamped_c;
523 add_pixels_clamped = add_pixels_clamped_c;
524
525 pix_abs16x16 = pix_abs16x16_c;
526 pix_abs16x16_x2 = pix_abs16x16_x2_c;
527 pix_abs16x16_y2 = pix_abs16x16_y2_c;
528 pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
529 av_fdct = jpeg_fdct_ifast;
530
c34270f5 531 use_permuted_idct = 1;
e0eac44e 532
980fc7b8 533#ifdef HAVE_MMX
de6d9b64
FB
534 dsputil_init_mmx();
535#endif
3d03c0a2
FB
536#ifdef ARCH_ARMV4L
537 dsputil_init_armv4l();
538#endif
c34270f5
FB
539#ifdef HAVE_MLIB
540 dsputil_init_mlib();
541 use_permuted_idct = 0;
542#endif
1e98dffb
NK
543#ifdef ARCH_ALPHA
544 dsputil_init_alpha();
545 use_permuted_idct = 0;
546#endif
c34270f5 547
d962f6fd
A
548#ifdef SIMPLE_IDCT
549 if(ff_idct == simple_idct) use_permuted_idct=0;
550#endif
551
5a240838
MN
552 if(use_permuted_idct)
553#ifdef SIMPLE_IDCT
554 for(i=0; i<64; i++) permutation[i]= simple_mmx_permutation[i];
555#else
556 for(i=0; i<64; i++) permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
557#endif
558 else
559 for(i=0; i<64; i++) permutation[i]=i;
560
2f349de2
MN
561 for(i=0; i<64; i++) inv_zigzag_direct16[zigzag_direct[i]]= i+1;
562 for(i=0; i<64; i++) zigzag_direct_noperm[i]= zigzag_direct[i];
563
c34270f5
FB
564 if (use_permuted_idct) {
565 /* permute for IDCT */
566 for(i=0;i<64;i++) {
567 j = zigzag_direct[i];
568 zigzag_direct[i] = block_permute_op(j);
569 j = ff_alternate_horizontal_scan[i];
570 ff_alternate_horizontal_scan[i] = block_permute_op(j);
571 j = ff_alternate_vertical_scan[i];
572 ff_alternate_vertical_scan[i] = block_permute_op(j);
573 }
574 block_permute(default_intra_matrix);
575 block_permute(default_non_intra_matrix);
576 }
badaf88e
MN
577
578 build_zigzag_end();
de6d9b64 579}
43f1708f
J
580
581void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3],
582 int orig_linesize[3], int coded_linesize,
583 AVCodecContext *avctx)
584{
585 int quad, diff, x, y;
586 UINT8 *orig, *coded;
587 UINT32 *sq = squareTbl + 256;
588
589 quad = 0;
590 diff = 0;
591
592 /* Luminance */
593 orig = orig_image[0];
594 coded = coded_image[0];
595
596 for (y=0;y<avctx->height;y++) {
597 for (x=0;x<avctx->width;x++) {
598 diff = *(orig + x) - *(coded + x);
599 quad += sq[diff];
600 }
601 orig += orig_linesize[0];
602 coded += coded_linesize;
603 }
604
605 avctx->psnr_y = (float) quad / (float) (avctx->width * avctx->height);
606
607 if (avctx->psnr_y) {
608 avctx->psnr_y = (float) (255 * 255) / avctx->psnr_y;
609 avctx->psnr_y = 10 * (float) log10 (avctx->psnr_y);
610 } else
611 avctx->psnr_y = 99.99;
612}
613