print frame decoding time support (x86 only)
[libav.git] / libavcodec / dsputil.c
CommitLineData
de6d9b64
FB
1/*
2 * DSP utils
3 * Copyright (c) 2000, 2001 Gerard Lantau.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
7ff037e9
MN
18 *
19 * gmc & q-pel support by Michael Niedermayer <michaelni@gmx.at>
de6d9b64
FB
20 */
21#include <stdlib.h>
22#include <stdio.h>
43f1708f 23#include <math.h>
de6d9b64
FB
24#include "avcodec.h"
25#include "dsputil.h"
d962f6fd 26#include "simple_idct.h"
de6d9b64 27
4af7bcc1 28void (*ff_idct)(DCTELEM *block);
de6d9b64 29void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
9dbcbd92 30void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
de6d9b64
FB
31void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
32void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
44eb4951 33void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
649c00c9 34void (*clear_blocks)(DCTELEM *blocks);
de6d9b64
FB
35
36op_pixels_abs_func pix_abs16x16;
37op_pixels_abs_func pix_abs16x16_x2;
38op_pixels_abs_func pix_abs16x16_y2;
39op_pixels_abs_func pix_abs16x16_xy2;
40
ba6802de
MN
41op_pixels_abs_func pix_abs8x8;
42op_pixels_abs_func pix_abs8x8_x2;
43op_pixels_abs_func pix_abs8x8_y2;
44op_pixels_abs_func pix_abs8x8_xy2;
45
0cfa9713 46UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
de6d9b64
FB
47UINT32 squareTbl[512];
48
e0eac44e
FB
49extern UINT16 default_intra_matrix[64];
50extern UINT16 default_non_intra_matrix[64];
3bf43d42
MN
51extern UINT16 ff_mpeg4_default_intra_matrix[64];
52extern UINT16 ff_mpeg4_default_non_intra_matrix[64];
e0eac44e
FB
53
54UINT8 zigzag_direct[64] = {
55 0, 1, 8, 16, 9, 2, 3, 10,
56 17, 24, 32, 25, 18, 11, 4, 5,
57 12, 19, 26, 33, 40, 48, 41, 34,
58 27, 20, 13, 6, 7, 14, 21, 28,
59 35, 42, 49, 56, 57, 50, 43, 36,
60 29, 22, 15, 23, 30, 37, 44, 51,
61 58, 59, 52, 45, 38, 31, 39, 46,
62 53, 60, 61, 54, 47, 55, 62, 63
63};
64
2f349de2
MN
65/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
66UINT16 __align8 inv_zigzag_direct16[64];
67
68/* not permutated zigzag_direct for MMX quantizer */
69UINT8 zigzag_direct_noperm[64];
70
e0eac44e
FB
71UINT8 ff_alternate_horizontal_scan[64] = {
72 0, 1, 2, 3, 8, 9, 16, 17,
73 10, 11, 4, 5, 6, 7, 15, 14,
74 13, 12, 19, 18, 24, 25, 32, 33,
75 26, 27, 20, 21, 22, 23, 28, 29,
76 30, 31, 34, 35, 40, 41, 48, 49,
77 42, 43, 36, 37, 38, 39, 44, 45,
78 46, 47, 50, 51, 56, 57, 58, 59,
79 52, 53, 54, 55, 60, 61, 62, 63,
80};
81
82UINT8 ff_alternate_vertical_scan[64] = {
83 0, 8, 16, 24, 1, 9, 2, 10,
84 17, 25, 32, 40, 48, 56, 57, 49,
85 41, 33, 26, 18, 3, 11, 4, 12,
86 19, 27, 34, 42, 50, 58, 35, 43,
87 51, 59, 20, 28, 5, 13, 6, 14,
88 21, 29, 36, 44, 52, 60, 37, 45,
89 53, 61, 22, 30, 7, 15, 23, 31,
90 38, 46, 54, 62, 39, 47, 55, 63,
91};
92
e4986da9
J
93#ifdef SIMPLE_IDCT
94
0a8d8945 95/* Input permutation for the simple_idct_mmx */
5a240838 96static UINT8 simple_mmx_permutation[64]={
0a8d8945
MN
97 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
98 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
99 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
100 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
101 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
102 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
103 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
104 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
5a240838 105};
e4986da9 106#endif
5a240838 107
2f349de2
MN
108/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
109UINT32 inverse[256]={
110 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
111 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
112 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
113 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
114 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
115 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
116 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
117 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
118 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
119 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
120 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
121 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
122 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
123 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
124 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
125 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
126 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
127 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
128 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
129 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
130 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
131 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
132 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
133 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
134 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
135 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
136 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
137 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
138 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
139 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
140 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
141 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
142};
143
badaf88e
MN
144/* used to skip zeros at the end */
145UINT8 zigzag_end[64];
146
5a240838
MN
147UINT8 permutation[64];
148//UINT8 invPermutation[64];
149
badaf88e
MN
150static void build_zigzag_end()
151{
152 int lastIndex;
153 int lastIndexAfterPerm=0;
154 for(lastIndex=0; lastIndex<64; lastIndex++)
155 {
156 if(zigzag_direct[lastIndex] > lastIndexAfterPerm)
157 lastIndexAfterPerm= zigzag_direct[lastIndex];
158 zigzag_end[lastIndex]= lastIndexAfterPerm + 1;
159 }
160}
161
de6d9b64
FB
162void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size)
163{
164 DCTELEM *p;
165 const UINT8 *pix;
166 int i;
167
168 /* read the pixels */
169 p = block;
170 pix = pixels;
171 for(i=0;i<8;i++) {
172 p[0] = pix[0];
173 p[1] = pix[1];
174 p[2] = pix[2];
175 p[3] = pix[3];
176 p[4] = pix[4];
177 p[5] = pix[5];
178 p[6] = pix[6];
179 p[7] = pix[7];
180 pix += line_size;
181 p += 8;
182 }
183}
184
9dbcbd92
MN
185void diff_pixels_c(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride){
186 DCTELEM *p;
187 int i;
188
189 /* read the pixels */
190 p = block;
191 for(i=0;i<8;i++) {
192 p[0] = s1[0] - s2[0];
193 p[1] = s1[1] - s2[1];
194 p[2] = s1[2] - s2[2];
195 p[3] = s1[3] - s2[3];
196 p[4] = s1[4] - s2[4];
197 p[5] = s1[5] - s2[5];
198 p[6] = s1[6] - s2[6];
199 p[7] = s1[7] - s2[7];
200 s1 += stride;
201 s2 += stride;
202 p += 8;
203 }
204}
205
206
de6d9b64
FB
207void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
208{
209 const DCTELEM *p;
210 UINT8 *pix;
211 int i;
212 UINT8 *cm = cropTbl + MAX_NEG_CROP;
213
214 /* read the pixels */
215 p = block;
216 pix = pixels;
217 for(i=0;i<8;i++) {
218 pix[0] = cm[p[0]];
219 pix[1] = cm[p[1]];
220 pix[2] = cm[p[2]];
221 pix[3] = cm[p[3]];
222 pix[4] = cm[p[4]];
223 pix[5] = cm[p[5]];
224 pix[6] = cm[p[6]];
225 pix[7] = cm[p[7]];
226 pix += line_size;
227 p += 8;
228 }
229}
230
231void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
232{
233 const DCTELEM *p;
234 UINT8 *pix;
235 int i;
236 UINT8 *cm = cropTbl + MAX_NEG_CROP;
237
238 /* read the pixels */
239 p = block;
240 pix = pixels;
241 for(i=0;i<8;i++) {
242 pix[0] = cm[pix[0] + p[0]];
243 pix[1] = cm[pix[1] + p[1]];
244 pix[2] = cm[pix[2] + p[2]];
245 pix[3] = cm[pix[3] + p[3]];
246 pix[4] = cm[pix[4] + p[4]];
247 pix[5] = cm[pix[5] + p[5]];
248 pix[6] = cm[pix[6] + p[6]];
249 pix[7] = cm[pix[7] + p[7]];
250 pix += line_size;
251 p += 8;
252 }
253}
254
255#define PIXOP(BTYPE, OPNAME, OP, INCR) \
256 \
257static void OPNAME ## _pixels(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
258{ \
259 BTYPE *p; \
260 const UINT8 *pix; \
261 \
262 p = block; \
263 pix = pixels; \
264 do { \
265 OP(p[0], pix[0]); \
266 OP(p[1], pix[1]); \
267 OP(p[2], pix[2]); \
268 OP(p[3], pix[3]); \
269 OP(p[4], pix[4]); \
270 OP(p[5], pix[5]); \
271 OP(p[6], pix[6]); \
272 OP(p[7], pix[7]); \
273 pix += line_size; \
274 p += INCR; \
275 } while (--h);; \
276} \
277 \
278static void OPNAME ## _pixels_x2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
279{ \
280 BTYPE *p; \
281 const UINT8 *pix; \
282 \
283 p = block; \
284 pix = pixels; \
285 do { \
286 OP(p[0], avg2(pix[0], pix[1])); \
287 OP(p[1], avg2(pix[1], pix[2])); \
288 OP(p[2], avg2(pix[2], pix[3])); \
289 OP(p[3], avg2(pix[3], pix[4])); \
290 OP(p[4], avg2(pix[4], pix[5])); \
291 OP(p[5], avg2(pix[5], pix[6])); \
292 OP(p[6], avg2(pix[6], pix[7])); \
293 OP(p[7], avg2(pix[7], pix[8])); \
294 pix += line_size; \
295 p += INCR; \
296 } while (--h); \
297} \
298 \
299static void OPNAME ## _pixels_y2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
300{ \
301 BTYPE *p; \
302 const UINT8 *pix; \
303 const UINT8 *pix1; \
304 \
305 p = block; \
306 pix = pixels; \
307 pix1 = pixels + line_size; \
308 do { \
309 OP(p[0], avg2(pix[0], pix1[0])); \
310 OP(p[1], avg2(pix[1], pix1[1])); \
311 OP(p[2], avg2(pix[2], pix1[2])); \
312 OP(p[3], avg2(pix[3], pix1[3])); \
313 OP(p[4], avg2(pix[4], pix1[4])); \
314 OP(p[5], avg2(pix[5], pix1[5])); \
315 OP(p[6], avg2(pix[6], pix1[6])); \
316 OP(p[7], avg2(pix[7], pix1[7])); \
317 pix += line_size; \
318 pix1 += line_size; \
319 p += INCR; \
320 } while(--h); \
321} \
322 \
323static void OPNAME ## _pixels_xy2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
324{ \
325 BTYPE *p; \
326 const UINT8 *pix; \
327 const UINT8 *pix1; \
328 \
329 p = block; \
330 pix = pixels; \
331 pix1 = pixels + line_size; \
332 do { \
333 OP(p[0], avg4(pix[0], pix[1], pix1[0], pix1[1])); \
334 OP(p[1], avg4(pix[1], pix[2], pix1[1], pix1[2])); \
335 OP(p[2], avg4(pix[2], pix[3], pix1[2], pix1[3])); \
336 OP(p[3], avg4(pix[3], pix[4], pix1[3], pix1[4])); \
337 OP(p[4], avg4(pix[4], pix[5], pix1[4], pix1[5])); \
338 OP(p[5], avg4(pix[5], pix[6], pix1[5], pix1[6])); \
339 OP(p[6], avg4(pix[6], pix[7], pix1[6], pix1[7])); \
340 OP(p[7], avg4(pix[7], pix[8], pix1[7], pix1[8])); \
341 pix += line_size; \
342 pix1 += line_size; \
343 p += INCR; \
344 } while(--h); \
345} \
346 \
347void (*OPNAME ## _pixels_tab[4])(BTYPE *block, const UINT8 *pixels, int line_size, int h) = { \
348 OPNAME ## _pixels, \
349 OPNAME ## _pixels_x2, \
350 OPNAME ## _pixels_y2, \
351 OPNAME ## _pixels_xy2, \
352};
353
354
355/* rounding primitives */
356#define avg2(a,b) ((a+b+1)>>1)
357#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
358
359#define op_put(a, b) a = b
360#define op_avg(a, b) a = avg2(a, b)
361#define op_sub(a, b) a -= b
362
363PIXOP(UINT8, put, op_put, line_size)
364PIXOP(UINT8, avg, op_avg, line_size)
365
366PIXOP(DCTELEM, sub, op_sub, 8)
367
368/* not rounding primitives */
369#undef avg2
370#undef avg4
371#define avg2(a,b) ((a+b)>>1)
372#define avg4(a,b,c,d) ((a+b+c+d+1)>>2)
373
374PIXOP(UINT8, put_no_rnd, op_put, line_size)
375PIXOP(UINT8, avg_no_rnd, op_avg, line_size)
376
377/* motion estimation */
378
379#undef avg2
380#undef avg4
381#define avg2(a,b) ((a+b+1)>>1)
382#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
383
44eb4951
MN
384static void gmc1_c(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder)
385{
386 const int A=(16-x16)*(16-y16);
387 const int B=( x16)*(16-y16);
388 const int C=(16-x16)*( y16);
389 const int D=( x16)*( y16);
390 int i;
391 rounder= 128 - rounder;
392
393 for(i=0; i<h; i++)
394 {
395 dst[0]= (A*src[0] + B*src[1] + C*src[srcStride+0] + D*src[srcStride+1] + rounder)>>8;
396 dst[1]= (A*src[1] + B*src[2] + C*src[srcStride+1] + D*src[srcStride+2] + rounder)>>8;
397 dst[2]= (A*src[2] + B*src[3] + C*src[srcStride+2] + D*src[srcStride+3] + rounder)>>8;
398 dst[3]= (A*src[3] + B*src[4] + C*src[srcStride+3] + D*src[srcStride+4] + rounder)>>8;
399 dst[4]= (A*src[4] + B*src[5] + C*src[srcStride+4] + D*src[srcStride+5] + rounder)>>8;
400 dst[5]= (A*src[5] + B*src[6] + C*src[srcStride+5] + D*src[srcStride+6] + rounder)>>8;
401 dst[6]= (A*src[6] + B*src[7] + C*src[srcStride+6] + D*src[srcStride+7] + rounder)>>8;
402 dst[7]= (A*src[7] + B*src[8] + C*src[srcStride+7] + D*src[srcStride+8] + rounder)>>8;
403 dst+= srcStride;
404 src+= srcStride;
405 }
406}
407
408static void qpel_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h, int r)
409{
410 UINT8 *cm = cropTbl + MAX_NEG_CROP;
411 int i;
412 for(i=0; i<h; i++)
413 {
ba6802de
MN
414 dst[0]= cm[(((src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]) + r)>>5)];
415 dst[1]= cm[(((src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]) + r)>>5)];
416 dst[2]= cm[(((src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]) + r)>>5)];
417 dst[3]= cm[(((src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]) + r)>>5)];
418 dst[4]= cm[(((src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]) + r)>>5)];
419 dst[5]= cm[(((src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]) + r)>>5)];
420 dst[6]= cm[(((src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]) + r)>>5)];
421 dst[7]= cm[(((src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]) + r)>>5)];
44eb4951
MN
422 dst+=dstStride;
423 src+=srcStride;
424 }
425}
426
427static void qpel_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int w, int r)
428{
429 UINT8 *cm = cropTbl + MAX_NEG_CROP;
430 int i;
431 for(i=0; i<w; i++)
432 {
433 const int src0= src[0*srcStride];
434 const int src1= src[1*srcStride];
435 const int src2= src[2*srcStride];
436 const int src3= src[3*srcStride];
437 const int src4= src[4*srcStride];
438 const int src5= src[5*srcStride];
439 const int src6= src[6*srcStride];
440 const int src7= src[7*srcStride];
441 const int src8= src[8*srcStride];
ba6802de
MN
442 dst[0*dstStride]= cm[(((src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4) + r)>>5)];
443 dst[1*dstStride]= cm[(((src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5) + r)>>5)];
444 dst[2*dstStride]= cm[(((src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6) + r)>>5)];
445 dst[3*dstStride]= cm[(((src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7) + r)>>5)];
446 dst[4*dstStride]= cm[(((src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8) + r)>>5)];
447 dst[5*dstStride]= cm[(((src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8) + r)>>5)];
448 dst[6*dstStride]= cm[(((src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7) + r)>>5)];
449 dst[7*dstStride]= cm[(((src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6) + r)>>5)];
44eb4951
MN
450 dst++;
451 src++;
452 }
453}
454
455static inline void put_block(UINT8 *dst, UINT8 *src, int dstStride, int srcStride)
456{
457 int i;
458 for(i=0; i<8; i++)
459 {
460 dst[0]= src[0];
461 dst[1]= src[1];
462 dst[2]= src[2];
463 dst[3]= src[3];
464 dst[4]= src[4];
465 dst[5]= src[5];
466 dst[6]= src[6];
467 dst[7]= src[7];
468 dst+=dstStride;
469 src+=srcStride;
470 }
471}
472
473static inline void avg2_block(UINT8 *dst, UINT8 *src1, UINT8 *src2, int dstStride, int srcStride, int r)
474{
475 int i;
476 for(i=0; i<8; i++)
477 {
478 dst[0]= (src1[0] + src2[0] + r)>>1;
479 dst[1]= (src1[1] + src2[1] + r)>>1;
480 dst[2]= (src1[2] + src2[2] + r)>>1;
481 dst[3]= (src1[3] + src2[3] + r)>>1;
482 dst[4]= (src1[4] + src2[4] + r)>>1;
483 dst[5]= (src1[5] + src2[5] + r)>>1;
484 dst[6]= (src1[6] + src2[6] + r)>>1;
485 dst[7]= (src1[7] + src2[7] + r)>>1;
486 dst+=dstStride;
487 src1+=srcStride;
488 src2+=8;
489 }
490}
491
492static inline void avg4_block(UINT8 *dst, UINT8 *src1, UINT8 *src2, UINT8 *src3, UINT8 *src4, int dstStride, int srcStride, int r)
493{
494 int i;
495 for(i=0; i<8; i++)
496 {
497 dst[0]= (src1[0] + src2[0] + src3[0] + src4[0] + r)>>2;
498 dst[1]= (src1[1] + src2[1] + src3[1] + src4[1] + r)>>2;
499 dst[2]= (src1[2] + src2[2] + src3[2] + src4[2] + r)>>2;
500 dst[3]= (src1[3] + src2[3] + src3[3] + src4[3] + r)>>2;
501 dst[4]= (src1[4] + src2[4] + src3[4] + src4[4] + r)>>2;
502 dst[5]= (src1[5] + src2[5] + src3[5] + src4[5] + r)>>2;
503 dst[6]= (src1[6] + src2[6] + src3[6] + src4[6] + r)>>2;
504 dst[7]= (src1[7] + src2[7] + src3[7] + src4[7] + r)>>2;
505 dst+=dstStride;
506 src1+=srcStride;
507 src2+=8;
7ff037e9 508 src3+=8;
44eb4951
MN
509 src4+=8;
510 }
511}
512
513#define QPEL_MC(r, name) \
514static void qpel_mc00_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
515{\
516 put_block(dst, src, dstStride, srcStride);\
517}\
518\
519static void qpel_mc10_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
520{\
521 UINT8 half[64];\
ba6802de 522 qpel_h_lowpass(half, src, 8, srcStride, 8, 16-r);\
44eb4951
MN
523 avg2_block(dst, src, half, dstStride, srcStride, 1-r);\
524}\
525\
526static void qpel_mc20_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
527{\
ba6802de 528 qpel_h_lowpass(dst, src, dstStride, srcStride, 8, 16-r);\
44eb4951
MN
529}\
530\
531static void qpel_mc30_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
532{\
533 UINT8 half[64];\
ba6802de 534 qpel_h_lowpass(half, src, 8, srcStride, 8, 16-r);\
44eb4951
MN
535 avg2_block(dst, src+1, half, dstStride, srcStride, 1-r);\
536}\
537\
538static void qpel_mc01_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
539{\
540 UINT8 half[64];\
ba6802de 541 qpel_v_lowpass(half, src, 8, srcStride, 8, 16-r);\
44eb4951
MN
542 avg2_block(dst, src, half, dstStride, srcStride, 1-r);\
543}\
544\
545static void qpel_mc02_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
546{\
ba6802de 547 qpel_v_lowpass(dst, src, dstStride, srcStride, 8, 16-r);\
44eb4951
MN
548}\
549\
550static void qpel_mc03_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
551{\
552 UINT8 half[64];\
ba6802de 553 qpel_v_lowpass(half, src, 8, srcStride, 8, 16-r);\
44eb4951
MN
554 avg2_block(dst, src+srcStride, half, dstStride, srcStride, 1-r);\
555}\
556static void qpel_mc11_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
557{\
558 UINT8 halfH[72];\
7ff037e9 559 UINT8 halfV[64];\
44eb4951 560 UINT8 halfHV[64];\
ba6802de
MN
561 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
562 qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\
563 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
44eb4951
MN
564 avg4_block(dst, src, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
565}\
566static void qpel_mc31_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
567{\
568 UINT8 halfH[72];\
7ff037e9 569 UINT8 halfV[64];\
44eb4951 570 UINT8 halfHV[64];\
ba6802de
MN
571 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
572 qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\
573 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
44eb4951
MN
574 avg4_block(dst, src+1, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
575}\
576static void qpel_mc13_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
577{\
578 UINT8 halfH[72];\
7ff037e9 579 UINT8 halfV[64];\
44eb4951 580 UINT8 halfHV[64];\
ba6802de
MN
581 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
582 qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\
583 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
7ff037e9 584 avg4_block(dst, src+srcStride, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\
44eb4951
MN
585}\
586static void qpel_mc33_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
587{\
588 UINT8 halfH[72];\
7ff037e9 589 UINT8 halfV[64];\
44eb4951 590 UINT8 halfHV[64];\
ba6802de
MN
591 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
592 qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\
593 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
7ff037e9 594 avg4_block(dst, src+srcStride+1, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\
44eb4951
MN
595}\
596static void qpel_mc21_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
597{\
598 UINT8 halfH[72];\
599 UINT8 halfHV[64];\
ba6802de
MN
600 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
601 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
44eb4951
MN
602 avg2_block(dst, halfH, halfHV, dstStride, 8, 1-r);\
603}\
604static void qpel_mc23_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
605{\
606 UINT8 halfH[72];\
607 UINT8 halfHV[64];\
ba6802de
MN
608 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
609 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
44eb4951
MN
610 avg2_block(dst, halfH+8, halfHV, dstStride, 8, 1-r);\
611}\
612static void qpel_mc12_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
613{\
614 UINT8 halfH[72];\
7ff037e9 615 UINT8 halfV[64];\
44eb4951 616 UINT8 halfHV[64];\
ba6802de
MN
617 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
618 qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\
619 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
7ff037e9 620 avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\
44eb4951
MN
621}\
622static void qpel_mc32_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
623{\
624 UINT8 halfH[72];\
7ff037e9 625 UINT8 halfV[64];\
44eb4951 626 UINT8 halfHV[64];\
ba6802de
MN
627 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
628 qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\
629 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
7ff037e9 630 avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\
44eb4951
MN
631}\
632static void qpel_mc22_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
633{\
634 UINT8 halfH[72];\
ba6802de
MN
635 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
636 qpel_v_lowpass(dst, halfH, dstStride, 8, 8, 16-r);\
44eb4951
MN
637}\
638qpel_mc_func qpel_mc ## name ## _tab[16]={ \
639 qpel_mc00_c ## name, \
640 qpel_mc10_c ## name, \
641 qpel_mc20_c ## name, \
642 qpel_mc30_c ## name, \
643 qpel_mc01_c ## name, \
644 qpel_mc11_c ## name, \
645 qpel_mc21_c ## name, \
646 qpel_mc31_c ## name, \
647 qpel_mc02_c ## name, \
648 qpel_mc12_c ## name, \
649 qpel_mc22_c ## name, \
650 qpel_mc32_c ## name, \
651 qpel_mc03_c ## name, \
652 qpel_mc13_c ## name, \
653 qpel_mc23_c ## name, \
654 qpel_mc33_c ## name, \
655};
656
657QPEL_MC(0, _rnd)
658QPEL_MC(1, _no_rnd)
659
ba6802de 660int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
de6d9b64
FB
661{
662 int s, i;
663
664 s = 0;
ba6802de 665 for(i=0;i<16;i++) {
de6d9b64
FB
666 s += abs(pix1[0] - pix2[0]);
667 s += abs(pix1[1] - pix2[1]);
668 s += abs(pix1[2] - pix2[2]);
669 s += abs(pix1[3] - pix2[3]);
670 s += abs(pix1[4] - pix2[4]);
671 s += abs(pix1[5] - pix2[5]);
672 s += abs(pix1[6] - pix2[6]);
673 s += abs(pix1[7] - pix2[7]);
674 s += abs(pix1[8] - pix2[8]);
675 s += abs(pix1[9] - pix2[9]);
676 s += abs(pix1[10] - pix2[10]);
677 s += abs(pix1[11] - pix2[11]);
678 s += abs(pix1[12] - pix2[12]);
679 s += abs(pix1[13] - pix2[13]);
680 s += abs(pix1[14] - pix2[14]);
681 s += abs(pix1[15] - pix2[15]);
682 pix1 += line_size;
683 pix2 += line_size;
684 }
685 return s;
686}
687
ba6802de 688int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
de6d9b64
FB
689{
690 int s, i;
691
692 s = 0;
ba6802de 693 for(i=0;i<16;i++) {
de6d9b64
FB
694 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
695 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
696 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
697 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
698 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
699 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
700 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
701 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
702 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
703 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
704 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
705 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
706 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
707 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
708 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
709 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
710 pix1 += line_size;
711 pix2 += line_size;
712 }
713 return s;
714}
715
ba6802de 716int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
de6d9b64
FB
717{
718 int s, i;
719 UINT8 *pix3 = pix2 + line_size;
720
721 s = 0;
ba6802de 722 for(i=0;i<16;i++) {
de6d9b64
FB
723 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
724 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
725 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
726 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
727 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
728 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
729 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
730 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
731 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
732 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
733 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
734 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
735 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
736 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
737 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
738 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
739 pix1 += line_size;
740 pix2 += line_size;
741 pix3 += line_size;
742 }
743 return s;
744}
745
ba6802de 746int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
de6d9b64
FB
747{
748 int s, i;
749 UINT8 *pix3 = pix2 + line_size;
750
751 s = 0;
ba6802de 752 for(i=0;i<16;i++) {
de6d9b64
FB
753 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
754 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
755 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
756 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
757 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
758 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
759 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
760 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
761 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
762 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
763 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
764 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
765 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
766 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
767 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
768 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
769 pix1 += line_size;
770 pix2 += line_size;
771 pix3 += line_size;
772 }
773 return s;
774}
775
ba6802de
MN
776int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
777{
778 int s, i;
779
780 s = 0;
781 for(i=0;i<8;i++) {
782 s += abs(pix1[0] - pix2[0]);
783 s += abs(pix1[1] - pix2[1]);
784 s += abs(pix1[2] - pix2[2]);
785 s += abs(pix1[3] - pix2[3]);
786 s += abs(pix1[4] - pix2[4]);
787 s += abs(pix1[5] - pix2[5]);
788 s += abs(pix1[6] - pix2[6]);
789 s += abs(pix1[7] - pix2[7]);
790 pix1 += line_size;
791 pix2 += line_size;
792 }
793 return s;
794}
795
796int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
797{
798 int s, i;
799
800 s = 0;
801 for(i=0;i<8;i++) {
802 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
803 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
804 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
805 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
806 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
807 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
808 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
809 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
810 pix1 += line_size;
811 pix2 += line_size;
812 }
813 return s;
814}
815
816int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
817{
818 int s, i;
819 UINT8 *pix3 = pix2 + line_size;
820
821 s = 0;
822 for(i=0;i<8;i++) {
823 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
824 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
825 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
826 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
827 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
828 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
829 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
830 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
831 pix1 += line_size;
832 pix2 += line_size;
833 pix3 += line_size;
834 }
835 return s;
836}
837
838int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
839{
840 int s, i;
841 UINT8 *pix3 = pix2 + line_size;
842
843 s = 0;
844 for(i=0;i<8;i++) {
845 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
846 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
847 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
848 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
849 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
850 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
851 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
852 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
853 pix1 += line_size;
854 pix2 += line_size;
855 pix3 += line_size;
856 }
857 return s;
858}
859
e0eac44e
FB
860/* permute block according so that it corresponds to the MMX idct
861 order */
d962f6fd 862#ifdef SIMPLE_IDCT
5a240838 863 /* general permutation, but perhaps slightly slower */
d962f6fd
A
864void block_permute(INT16 *block)
865{
866 int i;
867 INT16 temp[64];
868
d962f6fd
A
869 for(i=0; i<64; i++) temp[ block_permute_op(i) ] = block[i];
870
871 for(i=0; i<64; i++) block[i] = temp[i];
d962f6fd 872}
d962f6fd
A
873#else
874
e0eac44e 875void block_permute(INT16 *block)
de6d9b64 876{
e0eac44e 877 int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
de6d9b64
FB
878 int i;
879
e0eac44e
FB
880 for(i=0;i<8;i++) {
881 tmp1 = block[1];
882 tmp2 = block[2];
883 tmp3 = block[3];
884 tmp4 = block[4];
885 tmp5 = block[5];
886 tmp6 = block[6];
887 block[1] = tmp2;
888 block[2] = tmp4;
889 block[3] = tmp6;
890 block[4] = tmp1;
891 block[5] = tmp3;
892 block[6] = tmp5;
893 block += 8;
894 }
895}
d962f6fd 896#endif
e0eac44e 897
649c00c9
MN
898void clear_blocks_c(DCTELEM *blocks)
899{
900 memset(blocks, 0, sizeof(DCTELEM)*6*64);
901}
902
e0eac44e
FB
903void dsputil_init(void)
904{
905 int i, j;
c34270f5 906 int use_permuted_idct;
e0eac44e 907
de6d9b64
FB
908 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
909 for(i=0;i<MAX_NEG_CROP;i++) {
910 cropTbl[i] = 0;
911 cropTbl[i + MAX_NEG_CROP + 256] = 255;
912 }
913
914 for(i=0;i<512;i++) {
915 squareTbl[i] = (i - 256) * (i - 256);
916 }
917
d962f6fd
A
918#ifdef SIMPLE_IDCT
919 ff_idct = simple_idct;
920#else
4af7bcc1 921 ff_idct = j_rev_dct;
d962f6fd 922#endif
de6d9b64 923 get_pixels = get_pixels_c;
9dbcbd92 924 diff_pixels = diff_pixels_c;
de6d9b64
FB
925 put_pixels_clamped = put_pixels_clamped_c;
926 add_pixels_clamped = add_pixels_clamped_c;
44eb4951 927 gmc1= gmc1_c;
649c00c9 928 clear_blocks= clear_blocks_c;
de6d9b64 929
ba6802de
MN
930 pix_abs16x16 = pix_abs16x16_c;
931 pix_abs16x16_x2 = pix_abs16x16_x2_c;
932 pix_abs16x16_y2 = pix_abs16x16_y2_c;
de6d9b64 933 pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
ba6802de
MN
934 pix_abs8x8 = pix_abs8x8_c;
935 pix_abs8x8_x2 = pix_abs8x8_x2_c;
936 pix_abs8x8_y2 = pix_abs8x8_y2_c;
937 pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
de6d9b64
FB
938 av_fdct = jpeg_fdct_ifast;
939
c34270f5 940 use_permuted_idct = 1;
e0eac44e 941
980fc7b8 942#ifdef HAVE_MMX
de6d9b64
FB
943 dsputil_init_mmx();
944#endif
3d03c0a2
FB
945#ifdef ARCH_ARMV4L
946 dsputil_init_armv4l();
947#endif
c34270f5
FB
948#ifdef HAVE_MLIB
949 dsputil_init_mlib();
950 use_permuted_idct = 0;
951#endif
1e98dffb
NK
952#ifdef ARCH_ALPHA
953 dsputil_init_alpha();
954 use_permuted_idct = 0;
955#endif
c34270f5 956
d962f6fd
A
957#ifdef SIMPLE_IDCT
958 if(ff_idct == simple_idct) use_permuted_idct=0;
959#endif
960
5a240838
MN
961 if(use_permuted_idct)
962#ifdef SIMPLE_IDCT
963 for(i=0; i<64; i++) permutation[i]= simple_mmx_permutation[i];
964#else
965 for(i=0; i<64; i++) permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
966#endif
967 else
968 for(i=0; i<64; i++) permutation[i]=i;
969
2f349de2
MN
970 for(i=0; i<64; i++) inv_zigzag_direct16[zigzag_direct[i]]= i+1;
971 for(i=0; i<64; i++) zigzag_direct_noperm[i]= zigzag_direct[i];
972
c34270f5
FB
973 if (use_permuted_idct) {
974 /* permute for IDCT */
975 for(i=0;i<64;i++) {
976 j = zigzag_direct[i];
977 zigzag_direct[i] = block_permute_op(j);
978 j = ff_alternate_horizontal_scan[i];
979 ff_alternate_horizontal_scan[i] = block_permute_op(j);
980 j = ff_alternate_vertical_scan[i];
981 ff_alternate_vertical_scan[i] = block_permute_op(j);
982 }
983 block_permute(default_intra_matrix);
984 block_permute(default_non_intra_matrix);
3bf43d42
MN
985 block_permute(ff_mpeg4_default_intra_matrix);
986 block_permute(ff_mpeg4_default_non_intra_matrix);
c34270f5 987 }
badaf88e
MN
988
989 build_zigzag_end();
de6d9b64 990}
43f1708f
J
991
992void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3],
993 int orig_linesize[3], int coded_linesize,
994 AVCodecContext *avctx)
995{
996 int quad, diff, x, y;
997 UINT8 *orig, *coded;
998 UINT32 *sq = squareTbl + 256;
999
1000 quad = 0;
1001 diff = 0;
1002
1003 /* Luminance */
1004 orig = orig_image[0];
1005 coded = coded_image[0];
1006
1007 for (y=0;y<avctx->height;y++) {
1008 for (x=0;x<avctx->width;x++) {
1009 diff = *(orig + x) - *(coded + x);
1010 quad += sq[diff];
1011 }
1012 orig += orig_linesize[0];
1013 coded += coded_linesize;
1014 }
1015
1016 avctx->psnr_y = (float) quad / (float) (avctx->width * avctx->height);
1017
1018 if (avctx->psnr_y) {
1019 avctx->psnr_y = (float) (255 * 255) / avctx->psnr_y;
1020 avctx->psnr_y = 10 * (float) log10 (avctx->psnr_y);
1021 } else
1022 avctx->psnr_y = 99.99;
1023}
1024