* solaris does not support -q
[libav.git] / libavcodec / dsputil.c
CommitLineData
de6d9b64
FB
1/*
2 * DSP utils
3 * Copyright (c) 2000, 2001 Gerard Lantau.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
7ff037e9
MN
18 *
19 * gmc & q-pel support by Michael Niedermayer <michaelni@gmx.at>
de6d9b64
FB
20 */
21#include <stdlib.h>
22#include <stdio.h>
43f1708f 23#include <math.h>
de6d9b64
FB
24#include "avcodec.h"
25#include "dsputil.h"
d962f6fd 26#include "simple_idct.h"
de6d9b64 27
4af7bcc1 28void (*ff_idct)(DCTELEM *block);
de6d9b64
FB
29void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
30void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
31void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
44eb4951 32void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
649c00c9 33void (*clear_blocks)(DCTELEM *blocks);
de6d9b64
FB
34
35op_pixels_abs_func pix_abs16x16;
36op_pixels_abs_func pix_abs16x16_x2;
37op_pixels_abs_func pix_abs16x16_y2;
38op_pixels_abs_func pix_abs16x16_xy2;
39
ba6802de
MN
40op_pixels_abs_func pix_abs8x8;
41op_pixels_abs_func pix_abs8x8_x2;
42op_pixels_abs_func pix_abs8x8_y2;
43op_pixels_abs_func pix_abs8x8_xy2;
44
0cfa9713 45UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
de6d9b64
FB
46UINT32 squareTbl[512];
47
e0eac44e
FB
48extern UINT16 default_intra_matrix[64];
49extern UINT16 default_non_intra_matrix[64];
3bf43d42
MN
50extern UINT16 ff_mpeg4_default_intra_matrix[64];
51extern UINT16 ff_mpeg4_default_non_intra_matrix[64];
e0eac44e
FB
52
53UINT8 zigzag_direct[64] = {
54 0, 1, 8, 16, 9, 2, 3, 10,
55 17, 24, 32, 25, 18, 11, 4, 5,
56 12, 19, 26, 33, 40, 48, 41, 34,
57 27, 20, 13, 6, 7, 14, 21, 28,
58 35, 42, 49, 56, 57, 50, 43, 36,
59 29, 22, 15, 23, 30, 37, 44, 51,
60 58, 59, 52, 45, 38, 31, 39, 46,
61 53, 60, 61, 54, 47, 55, 62, 63
62};
63
2f349de2
MN
64/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
65UINT16 __align8 inv_zigzag_direct16[64];
66
67/* not permutated zigzag_direct for MMX quantizer */
68UINT8 zigzag_direct_noperm[64];
69
e0eac44e
FB
70UINT8 ff_alternate_horizontal_scan[64] = {
71 0, 1, 2, 3, 8, 9, 16, 17,
72 10, 11, 4, 5, 6, 7, 15, 14,
73 13, 12, 19, 18, 24, 25, 32, 33,
74 26, 27, 20, 21, 22, 23, 28, 29,
75 30, 31, 34, 35, 40, 41, 48, 49,
76 42, 43, 36, 37, 38, 39, 44, 45,
77 46, 47, 50, 51, 56, 57, 58, 59,
78 52, 53, 54, 55, 60, 61, 62, 63,
79};
80
81UINT8 ff_alternate_vertical_scan[64] = {
82 0, 8, 16, 24, 1, 9, 2, 10,
83 17, 25, 32, 40, 48, 56, 57, 49,
84 41, 33, 26, 18, 3, 11, 4, 12,
85 19, 27, 34, 42, 50, 58, 35, 43,
86 51, 59, 20, 28, 5, 13, 6, 14,
87 21, 29, 36, 44, 52, 60, 37, 45,
88 53, 61, 22, 30, 7, 15, 23, 31,
89 38, 46, 54, 62, 39, 47, 55, 63,
90};
91
e4986da9
J
92#ifdef SIMPLE_IDCT
93
0a8d8945 94/* Input permutation for the simple_idct_mmx */
5a240838 95static UINT8 simple_mmx_permutation[64]={
0a8d8945
MN
96 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
97 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
98 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
99 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
100 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
101 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
102 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
103 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
5a240838 104};
e4986da9 105#endif
5a240838 106
2f349de2
MN
107/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
108UINT32 inverse[256]={
109 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
110 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
111 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
112 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
113 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
114 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
115 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
116 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
117 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
118 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
119 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
120 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
121 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
122 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
123 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
124 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
125 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
126 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
127 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
128 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
129 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
130 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
131 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
132 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
133 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
134 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
135 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
136 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
137 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
138 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
139 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
140 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
141};
142
badaf88e
MN
143/* used to skip zeros at the end */
144UINT8 zigzag_end[64];
145
5a240838
MN
146UINT8 permutation[64];
147//UINT8 invPermutation[64];
148
badaf88e
MN
149static void build_zigzag_end()
150{
151 int lastIndex;
152 int lastIndexAfterPerm=0;
153 for(lastIndex=0; lastIndex<64; lastIndex++)
154 {
155 if(zigzag_direct[lastIndex] > lastIndexAfterPerm)
156 lastIndexAfterPerm= zigzag_direct[lastIndex];
157 zigzag_end[lastIndex]= lastIndexAfterPerm + 1;
158 }
159}
160
de6d9b64
FB
161void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size)
162{
163 DCTELEM *p;
164 const UINT8 *pix;
165 int i;
166
167 /* read the pixels */
168 p = block;
169 pix = pixels;
170 for(i=0;i<8;i++) {
171 p[0] = pix[0];
172 p[1] = pix[1];
173 p[2] = pix[2];
174 p[3] = pix[3];
175 p[4] = pix[4];
176 p[5] = pix[5];
177 p[6] = pix[6];
178 p[7] = pix[7];
179 pix += line_size;
180 p += 8;
181 }
182}
183
184void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
185{
186 const DCTELEM *p;
187 UINT8 *pix;
188 int i;
189 UINT8 *cm = cropTbl + MAX_NEG_CROP;
190
191 /* read the pixels */
192 p = block;
193 pix = pixels;
194 for(i=0;i<8;i++) {
195 pix[0] = cm[p[0]];
196 pix[1] = cm[p[1]];
197 pix[2] = cm[p[2]];
198 pix[3] = cm[p[3]];
199 pix[4] = cm[p[4]];
200 pix[5] = cm[p[5]];
201 pix[6] = cm[p[6]];
202 pix[7] = cm[p[7]];
203 pix += line_size;
204 p += 8;
205 }
206}
207
208void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
209{
210 const DCTELEM *p;
211 UINT8 *pix;
212 int i;
213 UINT8 *cm = cropTbl + MAX_NEG_CROP;
214
215 /* read the pixels */
216 p = block;
217 pix = pixels;
218 for(i=0;i<8;i++) {
219 pix[0] = cm[pix[0] + p[0]];
220 pix[1] = cm[pix[1] + p[1]];
221 pix[2] = cm[pix[2] + p[2]];
222 pix[3] = cm[pix[3] + p[3]];
223 pix[4] = cm[pix[4] + p[4]];
224 pix[5] = cm[pix[5] + p[5]];
225 pix[6] = cm[pix[6] + p[6]];
226 pix[7] = cm[pix[7] + p[7]];
227 pix += line_size;
228 p += 8;
229 }
230}
231
232#define PIXOP(BTYPE, OPNAME, OP, INCR) \
233 \
234static void OPNAME ## _pixels(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
235{ \
236 BTYPE *p; \
237 const UINT8 *pix; \
238 \
239 p = block; \
240 pix = pixels; \
241 do { \
242 OP(p[0], pix[0]); \
243 OP(p[1], pix[1]); \
244 OP(p[2], pix[2]); \
245 OP(p[3], pix[3]); \
246 OP(p[4], pix[4]); \
247 OP(p[5], pix[5]); \
248 OP(p[6], pix[6]); \
249 OP(p[7], pix[7]); \
250 pix += line_size; \
251 p += INCR; \
252 } while (--h);; \
253} \
254 \
255static void OPNAME ## _pixels_x2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
256{ \
257 BTYPE *p; \
258 const UINT8 *pix; \
259 \
260 p = block; \
261 pix = pixels; \
262 do { \
263 OP(p[0], avg2(pix[0], pix[1])); \
264 OP(p[1], avg2(pix[1], pix[2])); \
265 OP(p[2], avg2(pix[2], pix[3])); \
266 OP(p[3], avg2(pix[3], pix[4])); \
267 OP(p[4], avg2(pix[4], pix[5])); \
268 OP(p[5], avg2(pix[5], pix[6])); \
269 OP(p[6], avg2(pix[6], pix[7])); \
270 OP(p[7], avg2(pix[7], pix[8])); \
271 pix += line_size; \
272 p += INCR; \
273 } while (--h); \
274} \
275 \
276static void OPNAME ## _pixels_y2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
277{ \
278 BTYPE *p; \
279 const UINT8 *pix; \
280 const UINT8 *pix1; \
281 \
282 p = block; \
283 pix = pixels; \
284 pix1 = pixels + line_size; \
285 do { \
286 OP(p[0], avg2(pix[0], pix1[0])); \
287 OP(p[1], avg2(pix[1], pix1[1])); \
288 OP(p[2], avg2(pix[2], pix1[2])); \
289 OP(p[3], avg2(pix[3], pix1[3])); \
290 OP(p[4], avg2(pix[4], pix1[4])); \
291 OP(p[5], avg2(pix[5], pix1[5])); \
292 OP(p[6], avg2(pix[6], pix1[6])); \
293 OP(p[7], avg2(pix[7], pix1[7])); \
294 pix += line_size; \
295 pix1 += line_size; \
296 p += INCR; \
297 } while(--h); \
298} \
299 \
300static void OPNAME ## _pixels_xy2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
301{ \
302 BTYPE *p; \
303 const UINT8 *pix; \
304 const UINT8 *pix1; \
305 \
306 p = block; \
307 pix = pixels; \
308 pix1 = pixels + line_size; \
309 do { \
310 OP(p[0], avg4(pix[0], pix[1], pix1[0], pix1[1])); \
311 OP(p[1], avg4(pix[1], pix[2], pix1[1], pix1[2])); \
312 OP(p[2], avg4(pix[2], pix[3], pix1[2], pix1[3])); \
313 OP(p[3], avg4(pix[3], pix[4], pix1[3], pix1[4])); \
314 OP(p[4], avg4(pix[4], pix[5], pix1[4], pix1[5])); \
315 OP(p[5], avg4(pix[5], pix[6], pix1[5], pix1[6])); \
316 OP(p[6], avg4(pix[6], pix[7], pix1[6], pix1[7])); \
317 OP(p[7], avg4(pix[7], pix[8], pix1[7], pix1[8])); \
318 pix += line_size; \
319 pix1 += line_size; \
320 p += INCR; \
321 } while(--h); \
322} \
323 \
324void (*OPNAME ## _pixels_tab[4])(BTYPE *block, const UINT8 *pixels, int line_size, int h) = { \
325 OPNAME ## _pixels, \
326 OPNAME ## _pixels_x2, \
327 OPNAME ## _pixels_y2, \
328 OPNAME ## _pixels_xy2, \
329};
330
331
332/* rounding primitives */
333#define avg2(a,b) ((a+b+1)>>1)
334#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
335
336#define op_put(a, b) a = b
337#define op_avg(a, b) a = avg2(a, b)
338#define op_sub(a, b) a -= b
339
340PIXOP(UINT8, put, op_put, line_size)
341PIXOP(UINT8, avg, op_avg, line_size)
342
343PIXOP(DCTELEM, sub, op_sub, 8)
344
345/* not rounding primitives */
346#undef avg2
347#undef avg4
348#define avg2(a,b) ((a+b)>>1)
349#define avg4(a,b,c,d) ((a+b+c+d+1)>>2)
350
351PIXOP(UINT8, put_no_rnd, op_put, line_size)
352PIXOP(UINT8, avg_no_rnd, op_avg, line_size)
353
354/* motion estimation */
355
356#undef avg2
357#undef avg4
358#define avg2(a,b) ((a+b+1)>>1)
359#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
360
44eb4951
MN
361static void gmc1_c(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder)
362{
363 const int A=(16-x16)*(16-y16);
364 const int B=( x16)*(16-y16);
365 const int C=(16-x16)*( y16);
366 const int D=( x16)*( y16);
367 int i;
368 rounder= 128 - rounder;
369
370 for(i=0; i<h; i++)
371 {
372 dst[0]= (A*src[0] + B*src[1] + C*src[srcStride+0] + D*src[srcStride+1] + rounder)>>8;
373 dst[1]= (A*src[1] + B*src[2] + C*src[srcStride+1] + D*src[srcStride+2] + rounder)>>8;
374 dst[2]= (A*src[2] + B*src[3] + C*src[srcStride+2] + D*src[srcStride+3] + rounder)>>8;
375 dst[3]= (A*src[3] + B*src[4] + C*src[srcStride+3] + D*src[srcStride+4] + rounder)>>8;
376 dst[4]= (A*src[4] + B*src[5] + C*src[srcStride+4] + D*src[srcStride+5] + rounder)>>8;
377 dst[5]= (A*src[5] + B*src[6] + C*src[srcStride+5] + D*src[srcStride+6] + rounder)>>8;
378 dst[6]= (A*src[6] + B*src[7] + C*src[srcStride+6] + D*src[srcStride+7] + rounder)>>8;
379 dst[7]= (A*src[7] + B*src[8] + C*src[srcStride+7] + D*src[srcStride+8] + rounder)>>8;
380 dst+= srcStride;
381 src+= srcStride;
382 }
383}
384
385static void qpel_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h, int r)
386{
387 UINT8 *cm = cropTbl + MAX_NEG_CROP;
388 int i;
389 for(i=0; i<h; i++)
390 {
ba6802de
MN
391 dst[0]= cm[(((src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]) + r)>>5)];
392 dst[1]= cm[(((src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]) + r)>>5)];
393 dst[2]= cm[(((src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]) + r)>>5)];
394 dst[3]= cm[(((src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]) + r)>>5)];
395 dst[4]= cm[(((src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]) + r)>>5)];
396 dst[5]= cm[(((src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]) + r)>>5)];
397 dst[6]= cm[(((src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]) + r)>>5)];
398 dst[7]= cm[(((src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]) + r)>>5)];
44eb4951
MN
399 dst+=dstStride;
400 src+=srcStride;
401 }
402}
403
404static void qpel_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int w, int r)
405{
406 UINT8 *cm = cropTbl + MAX_NEG_CROP;
407 int i;
408 for(i=0; i<w; i++)
409 {
410 const int src0= src[0*srcStride];
411 const int src1= src[1*srcStride];
412 const int src2= src[2*srcStride];
413 const int src3= src[3*srcStride];
414 const int src4= src[4*srcStride];
415 const int src5= src[5*srcStride];
416 const int src6= src[6*srcStride];
417 const int src7= src[7*srcStride];
418 const int src8= src[8*srcStride];
ba6802de
MN
419 dst[0*dstStride]= cm[(((src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4) + r)>>5)];
420 dst[1*dstStride]= cm[(((src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5) + r)>>5)];
421 dst[2*dstStride]= cm[(((src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6) + r)>>5)];
422 dst[3*dstStride]= cm[(((src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7) + r)>>5)];
423 dst[4*dstStride]= cm[(((src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8) + r)>>5)];
424 dst[5*dstStride]= cm[(((src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8) + r)>>5)];
425 dst[6*dstStride]= cm[(((src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7) + r)>>5)];
426 dst[7*dstStride]= cm[(((src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6) + r)>>5)];
44eb4951
MN
427 dst++;
428 src++;
429 }
430}
431
432static inline void put_block(UINT8 *dst, UINT8 *src, int dstStride, int srcStride)
433{
434 int i;
435 for(i=0; i<8; i++)
436 {
437 dst[0]= src[0];
438 dst[1]= src[1];
439 dst[2]= src[2];
440 dst[3]= src[3];
441 dst[4]= src[4];
442 dst[5]= src[5];
443 dst[6]= src[6];
444 dst[7]= src[7];
445 dst+=dstStride;
446 src+=srcStride;
447 }
448}
449
450static inline void avg2_block(UINT8 *dst, UINT8 *src1, UINT8 *src2, int dstStride, int srcStride, int r)
451{
452 int i;
453 for(i=0; i<8; i++)
454 {
455 dst[0]= (src1[0] + src2[0] + r)>>1;
456 dst[1]= (src1[1] + src2[1] + r)>>1;
457 dst[2]= (src1[2] + src2[2] + r)>>1;
458 dst[3]= (src1[3] + src2[3] + r)>>1;
459 dst[4]= (src1[4] + src2[4] + r)>>1;
460 dst[5]= (src1[5] + src2[5] + r)>>1;
461 dst[6]= (src1[6] + src2[6] + r)>>1;
462 dst[7]= (src1[7] + src2[7] + r)>>1;
463 dst+=dstStride;
464 src1+=srcStride;
465 src2+=8;
466 }
467}
468
469static inline void avg4_block(UINT8 *dst, UINT8 *src1, UINT8 *src2, UINT8 *src3, UINT8 *src4, int dstStride, int srcStride, int r)
470{
471 int i;
472 for(i=0; i<8; i++)
473 {
474 dst[0]= (src1[0] + src2[0] + src3[0] + src4[0] + r)>>2;
475 dst[1]= (src1[1] + src2[1] + src3[1] + src4[1] + r)>>2;
476 dst[2]= (src1[2] + src2[2] + src3[2] + src4[2] + r)>>2;
477 dst[3]= (src1[3] + src2[3] + src3[3] + src4[3] + r)>>2;
478 dst[4]= (src1[4] + src2[4] + src3[4] + src4[4] + r)>>2;
479 dst[5]= (src1[5] + src2[5] + src3[5] + src4[5] + r)>>2;
480 dst[6]= (src1[6] + src2[6] + src3[6] + src4[6] + r)>>2;
481 dst[7]= (src1[7] + src2[7] + src3[7] + src4[7] + r)>>2;
482 dst+=dstStride;
483 src1+=srcStride;
484 src2+=8;
7ff037e9 485 src3+=8;
44eb4951
MN
486 src4+=8;
487 }
488}
489
490#define QPEL_MC(r, name) \
491static void qpel_mc00_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
492{\
493 put_block(dst, src, dstStride, srcStride);\
494}\
495\
496static void qpel_mc10_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
497{\
498 UINT8 half[64];\
ba6802de 499 qpel_h_lowpass(half, src, 8, srcStride, 8, 16-r);\
44eb4951
MN
500 avg2_block(dst, src, half, dstStride, srcStride, 1-r);\
501}\
502\
503static void qpel_mc20_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
504{\
ba6802de 505 qpel_h_lowpass(dst, src, dstStride, srcStride, 8, 16-r);\
44eb4951
MN
506}\
507\
508static void qpel_mc30_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
509{\
510 UINT8 half[64];\
ba6802de 511 qpel_h_lowpass(half, src, 8, srcStride, 8, 16-r);\
44eb4951
MN
512 avg2_block(dst, src+1, half, dstStride, srcStride, 1-r);\
513}\
514\
515static void qpel_mc01_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
516{\
517 UINT8 half[64];\
ba6802de 518 qpel_v_lowpass(half, src, 8, srcStride, 8, 16-r);\
44eb4951
MN
519 avg2_block(dst, src, half, dstStride, srcStride, 1-r);\
520}\
521\
522static void qpel_mc02_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
523{\
ba6802de 524 qpel_v_lowpass(dst, src, dstStride, srcStride, 8, 16-r);\
44eb4951
MN
525}\
526\
527static void qpel_mc03_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
528{\
529 UINT8 half[64];\
ba6802de 530 qpel_v_lowpass(half, src, 8, srcStride, 8, 16-r);\
44eb4951
MN
531 avg2_block(dst, src+srcStride, half, dstStride, srcStride, 1-r);\
532}\
533static void qpel_mc11_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
534{\
535 UINT8 halfH[72];\
7ff037e9 536 UINT8 halfV[64];\
44eb4951 537 UINT8 halfHV[64];\
ba6802de
MN
538 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
539 qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\
540 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
44eb4951
MN
541 avg4_block(dst, src, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
542}\
543static void qpel_mc31_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
544{\
545 UINT8 halfH[72];\
7ff037e9 546 UINT8 halfV[64];\
44eb4951 547 UINT8 halfHV[64];\
ba6802de
MN
548 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
549 qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\
550 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
44eb4951
MN
551 avg4_block(dst, src+1, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
552}\
553static void qpel_mc13_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
554{\
555 UINT8 halfH[72];\
7ff037e9 556 UINT8 halfV[64];\
44eb4951 557 UINT8 halfHV[64];\
ba6802de
MN
558 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
559 qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\
560 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
7ff037e9 561 avg4_block(dst, src+srcStride, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\
44eb4951
MN
562}\
563static void qpel_mc33_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
564{\
565 UINT8 halfH[72];\
7ff037e9 566 UINT8 halfV[64];\
44eb4951 567 UINT8 halfHV[64];\
ba6802de
MN
568 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
569 qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\
570 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
7ff037e9 571 avg4_block(dst, src+srcStride+1, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\
44eb4951
MN
572}\
573static void qpel_mc21_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
574{\
575 UINT8 halfH[72];\
576 UINT8 halfHV[64];\
ba6802de
MN
577 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
578 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
44eb4951
MN
579 avg2_block(dst, halfH, halfHV, dstStride, 8, 1-r);\
580}\
581static void qpel_mc23_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
582{\
583 UINT8 halfH[72];\
584 UINT8 halfHV[64];\
ba6802de
MN
585 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
586 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
44eb4951
MN
587 avg2_block(dst, halfH+8, halfHV, dstStride, 8, 1-r);\
588}\
589static void qpel_mc12_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
590{\
591 UINT8 halfH[72];\
7ff037e9 592 UINT8 halfV[64];\
44eb4951 593 UINT8 halfHV[64];\
ba6802de
MN
594 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
595 qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\
596 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
7ff037e9 597 avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\
44eb4951
MN
598}\
599static void qpel_mc32_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
600{\
601 UINT8 halfH[72];\
7ff037e9 602 UINT8 halfV[64];\
44eb4951 603 UINT8 halfHV[64];\
ba6802de
MN
604 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
605 qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\
606 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
7ff037e9 607 avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\
44eb4951
MN
608}\
609static void qpel_mc22_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
610{\
611 UINT8 halfH[72];\
ba6802de
MN
612 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
613 qpel_v_lowpass(dst, halfH, dstStride, 8, 8, 16-r);\
44eb4951
MN
614}\
615qpel_mc_func qpel_mc ## name ## _tab[16]={ \
616 qpel_mc00_c ## name, \
617 qpel_mc10_c ## name, \
618 qpel_mc20_c ## name, \
619 qpel_mc30_c ## name, \
620 qpel_mc01_c ## name, \
621 qpel_mc11_c ## name, \
622 qpel_mc21_c ## name, \
623 qpel_mc31_c ## name, \
624 qpel_mc02_c ## name, \
625 qpel_mc12_c ## name, \
626 qpel_mc22_c ## name, \
627 qpel_mc32_c ## name, \
628 qpel_mc03_c ## name, \
629 qpel_mc13_c ## name, \
630 qpel_mc23_c ## name, \
631 qpel_mc33_c ## name, \
632};
633
634QPEL_MC(0, _rnd)
635QPEL_MC(1, _no_rnd)
636
ba6802de 637int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
de6d9b64
FB
638{
639 int s, i;
640
641 s = 0;
ba6802de 642 for(i=0;i<16;i++) {
de6d9b64
FB
643 s += abs(pix1[0] - pix2[0]);
644 s += abs(pix1[1] - pix2[1]);
645 s += abs(pix1[2] - pix2[2]);
646 s += abs(pix1[3] - pix2[3]);
647 s += abs(pix1[4] - pix2[4]);
648 s += abs(pix1[5] - pix2[5]);
649 s += abs(pix1[6] - pix2[6]);
650 s += abs(pix1[7] - pix2[7]);
651 s += abs(pix1[8] - pix2[8]);
652 s += abs(pix1[9] - pix2[9]);
653 s += abs(pix1[10] - pix2[10]);
654 s += abs(pix1[11] - pix2[11]);
655 s += abs(pix1[12] - pix2[12]);
656 s += abs(pix1[13] - pix2[13]);
657 s += abs(pix1[14] - pix2[14]);
658 s += abs(pix1[15] - pix2[15]);
659 pix1 += line_size;
660 pix2 += line_size;
661 }
662 return s;
663}
664
ba6802de 665int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
de6d9b64
FB
666{
667 int s, i;
668
669 s = 0;
ba6802de 670 for(i=0;i<16;i++) {
de6d9b64
FB
671 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
672 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
673 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
674 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
675 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
676 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
677 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
678 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
679 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
680 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
681 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
682 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
683 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
684 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
685 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
686 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
687 pix1 += line_size;
688 pix2 += line_size;
689 }
690 return s;
691}
692
ba6802de 693int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
de6d9b64
FB
694{
695 int s, i;
696 UINT8 *pix3 = pix2 + line_size;
697
698 s = 0;
ba6802de 699 for(i=0;i<16;i++) {
de6d9b64
FB
700 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
701 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
702 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
703 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
704 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
705 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
706 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
707 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
708 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
709 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
710 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
711 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
712 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
713 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
714 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
715 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
716 pix1 += line_size;
717 pix2 += line_size;
718 pix3 += line_size;
719 }
720 return s;
721}
722
ba6802de 723int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
de6d9b64
FB
724{
725 int s, i;
726 UINT8 *pix3 = pix2 + line_size;
727
728 s = 0;
ba6802de 729 for(i=0;i<16;i++) {
de6d9b64
FB
730 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
731 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
732 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
733 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
734 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
735 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
736 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
737 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
738 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
739 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
740 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
741 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
742 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
743 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
744 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
745 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
746 pix1 += line_size;
747 pix2 += line_size;
748 pix3 += line_size;
749 }
750 return s;
751}
752
ba6802de
MN
753int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
754{
755 int s, i;
756
757 s = 0;
758 for(i=0;i<8;i++) {
759 s += abs(pix1[0] - pix2[0]);
760 s += abs(pix1[1] - pix2[1]);
761 s += abs(pix1[2] - pix2[2]);
762 s += abs(pix1[3] - pix2[3]);
763 s += abs(pix1[4] - pix2[4]);
764 s += abs(pix1[5] - pix2[5]);
765 s += abs(pix1[6] - pix2[6]);
766 s += abs(pix1[7] - pix2[7]);
767 pix1 += line_size;
768 pix2 += line_size;
769 }
770 return s;
771}
772
773int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
774{
775 int s, i;
776
777 s = 0;
778 for(i=0;i<8;i++) {
779 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
780 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
781 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
782 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
783 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
784 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
785 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
786 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
787 pix1 += line_size;
788 pix2 += line_size;
789 }
790 return s;
791}
792
793int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
794{
795 int s, i;
796 UINT8 *pix3 = pix2 + line_size;
797
798 s = 0;
799 for(i=0;i<8;i++) {
800 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
801 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
802 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
803 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
804 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
805 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
806 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
807 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
808 pix1 += line_size;
809 pix2 += line_size;
810 pix3 += line_size;
811 }
812 return s;
813}
814
815int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
816{
817 int s, i;
818 UINT8 *pix3 = pix2 + line_size;
819
820 s = 0;
821 for(i=0;i<8;i++) {
822 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
823 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
824 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
825 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
826 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
827 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
828 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
829 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
830 pix1 += line_size;
831 pix2 += line_size;
832 pix3 += line_size;
833 }
834 return s;
835}
836
e0eac44e
FB
837/* permute block according so that it corresponds to the MMX idct
838 order */
d962f6fd 839#ifdef SIMPLE_IDCT
5a240838 840 /* general permutation, but perhaps slightly slower */
d962f6fd
A
841void block_permute(INT16 *block)
842{
843 int i;
844 INT16 temp[64];
845
d962f6fd
A
846 for(i=0; i<64; i++) temp[ block_permute_op(i) ] = block[i];
847
848 for(i=0; i<64; i++) block[i] = temp[i];
d962f6fd 849}
d962f6fd
A
850#else
851
e0eac44e 852void block_permute(INT16 *block)
de6d9b64 853{
e0eac44e 854 int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
de6d9b64
FB
855 int i;
856
e0eac44e
FB
857 for(i=0;i<8;i++) {
858 tmp1 = block[1];
859 tmp2 = block[2];
860 tmp3 = block[3];
861 tmp4 = block[4];
862 tmp5 = block[5];
863 tmp6 = block[6];
864 block[1] = tmp2;
865 block[2] = tmp4;
866 block[3] = tmp6;
867 block[4] = tmp1;
868 block[5] = tmp3;
869 block[6] = tmp5;
870 block += 8;
871 }
872}
d962f6fd 873#endif
e0eac44e 874
649c00c9
MN
875void clear_blocks_c(DCTELEM *blocks)
876{
877 memset(blocks, 0, sizeof(DCTELEM)*6*64);
878}
879
e0eac44e
FB
880void dsputil_init(void)
881{
882 int i, j;
c34270f5 883 int use_permuted_idct;
e0eac44e 884
de6d9b64
FB
885 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
886 for(i=0;i<MAX_NEG_CROP;i++) {
887 cropTbl[i] = 0;
888 cropTbl[i + MAX_NEG_CROP + 256] = 255;
889 }
890
891 for(i=0;i<512;i++) {
892 squareTbl[i] = (i - 256) * (i - 256);
893 }
894
d962f6fd
A
895#ifdef SIMPLE_IDCT
896 ff_idct = simple_idct;
897#else
4af7bcc1 898 ff_idct = j_rev_dct;
d962f6fd 899#endif
de6d9b64
FB
900 get_pixels = get_pixels_c;
901 put_pixels_clamped = put_pixels_clamped_c;
902 add_pixels_clamped = add_pixels_clamped_c;
44eb4951 903 gmc1= gmc1_c;
649c00c9 904 clear_blocks= clear_blocks_c;
de6d9b64 905
ba6802de
MN
906 pix_abs16x16 = pix_abs16x16_c;
907 pix_abs16x16_x2 = pix_abs16x16_x2_c;
908 pix_abs16x16_y2 = pix_abs16x16_y2_c;
de6d9b64 909 pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
ba6802de
MN
910 pix_abs8x8 = pix_abs8x8_c;
911 pix_abs8x8_x2 = pix_abs8x8_x2_c;
912 pix_abs8x8_y2 = pix_abs8x8_y2_c;
913 pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
de6d9b64
FB
914 av_fdct = jpeg_fdct_ifast;
915
c34270f5 916 use_permuted_idct = 1;
e0eac44e 917
980fc7b8 918#ifdef HAVE_MMX
de6d9b64
FB
919 dsputil_init_mmx();
920#endif
3d03c0a2
FB
921#ifdef ARCH_ARMV4L
922 dsputil_init_armv4l();
923#endif
c34270f5
FB
924#ifdef HAVE_MLIB
925 dsputil_init_mlib();
926 use_permuted_idct = 0;
927#endif
1e98dffb
NK
928#ifdef ARCH_ALPHA
929 dsputil_init_alpha();
930 use_permuted_idct = 0;
931#endif
c34270f5 932
d962f6fd
A
933#ifdef SIMPLE_IDCT
934 if(ff_idct == simple_idct) use_permuted_idct=0;
935#endif
936
5a240838
MN
937 if(use_permuted_idct)
938#ifdef SIMPLE_IDCT
939 for(i=0; i<64; i++) permutation[i]= simple_mmx_permutation[i];
940#else
941 for(i=0; i<64; i++) permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
942#endif
943 else
944 for(i=0; i<64; i++) permutation[i]=i;
945
2f349de2
MN
946 for(i=0; i<64; i++) inv_zigzag_direct16[zigzag_direct[i]]= i+1;
947 for(i=0; i<64; i++) zigzag_direct_noperm[i]= zigzag_direct[i];
948
c34270f5
FB
949 if (use_permuted_idct) {
950 /* permute for IDCT */
951 for(i=0;i<64;i++) {
952 j = zigzag_direct[i];
953 zigzag_direct[i] = block_permute_op(j);
954 j = ff_alternate_horizontal_scan[i];
955 ff_alternate_horizontal_scan[i] = block_permute_op(j);
956 j = ff_alternate_vertical_scan[i];
957 ff_alternate_vertical_scan[i] = block_permute_op(j);
958 }
959 block_permute(default_intra_matrix);
960 block_permute(default_non_intra_matrix);
3bf43d42
MN
961 block_permute(ff_mpeg4_default_intra_matrix);
962 block_permute(ff_mpeg4_default_non_intra_matrix);
c34270f5 963 }
badaf88e
MN
964
965 build_zigzag_end();
de6d9b64 966}
43f1708f
J
967
968void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3],
969 int orig_linesize[3], int coded_linesize,
970 AVCodecContext *avctx)
971{
972 int quad, diff, x, y;
973 UINT8 *orig, *coded;
974 UINT32 *sq = squareTbl + 256;
975
976 quad = 0;
977 diff = 0;
978
979 /* Luminance */
980 orig = orig_image[0];
981 coded = coded_image[0];
982
983 for (y=0;y<avctx->height;y++) {
984 for (x=0;x<avctx->width;x++) {
985 diff = *(orig + x) - *(coded + x);
986 quad += sq[diff];
987 }
988 orig += orig_linesize[0];
989 coded += coded_linesize;
990 }
991
992 avctx->psnr_y = (float) quad / (float) (avctx->width * avctx->height);
993
994 if (avctx->psnr_y) {
995 avctx->psnr_y = (float) (255 * 255) / avctx->psnr_y;
996 avctx->psnr_y = 10 * (float) log10 (avctx->psnr_y);
997 } else
998 avctx->psnr_y = 99.99;
999}
1000