mpeg4 4MV encoding
[libav.git] / libavcodec / dsputil.c
CommitLineData
de6d9b64
FB
1/*
2 * DSP utils
3 * Copyright (c) 2000, 2001 Gerard Lantau.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
7ff037e9
MN
18 *
19 * gmc & q-pel support by Michael Niedermayer <michaelni@gmx.at>
de6d9b64
FB
20 */
21#include <stdlib.h>
22#include <stdio.h>
43f1708f 23#include <math.h>
de6d9b64
FB
24#include "avcodec.h"
25#include "dsputil.h"
d962f6fd 26#include "simple_idct.h"
de6d9b64 27
4af7bcc1 28void (*ff_idct)(DCTELEM *block);
de6d9b64
FB
29void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
30void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
31void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
44eb4951 32void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
de6d9b64
FB
33
34op_pixels_abs_func pix_abs16x16;
35op_pixels_abs_func pix_abs16x16_x2;
36op_pixels_abs_func pix_abs16x16_y2;
37op_pixels_abs_func pix_abs16x16_xy2;
38
ba6802de
MN
39op_pixels_abs_func pix_abs8x8;
40op_pixels_abs_func pix_abs8x8_x2;
41op_pixels_abs_func pix_abs8x8_y2;
42op_pixels_abs_func pix_abs8x8_xy2;
43
0cfa9713 44UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
de6d9b64
FB
45UINT32 squareTbl[512];
46
e0eac44e
FB
47extern UINT16 default_intra_matrix[64];
48extern UINT16 default_non_intra_matrix[64];
49
50UINT8 zigzag_direct[64] = {
51 0, 1, 8, 16, 9, 2, 3, 10,
52 17, 24, 32, 25, 18, 11, 4, 5,
53 12, 19, 26, 33, 40, 48, 41, 34,
54 27, 20, 13, 6, 7, 14, 21, 28,
55 35, 42, 49, 56, 57, 50, 43, 36,
56 29, 22, 15, 23, 30, 37, 44, 51,
57 58, 59, 52, 45, 38, 31, 39, 46,
58 53, 60, 61, 54, 47, 55, 62, 63
59};
60
2f349de2
MN
61/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
62UINT16 __align8 inv_zigzag_direct16[64];
63
64/* not permutated zigzag_direct for MMX quantizer */
65UINT8 zigzag_direct_noperm[64];
66
e0eac44e
FB
67UINT8 ff_alternate_horizontal_scan[64] = {
68 0, 1, 2, 3, 8, 9, 16, 17,
69 10, 11, 4, 5, 6, 7, 15, 14,
70 13, 12, 19, 18, 24, 25, 32, 33,
71 26, 27, 20, 21, 22, 23, 28, 29,
72 30, 31, 34, 35, 40, 41, 48, 49,
73 42, 43, 36, 37, 38, 39, 44, 45,
74 46, 47, 50, 51, 56, 57, 58, 59,
75 52, 53, 54, 55, 60, 61, 62, 63,
76};
77
78UINT8 ff_alternate_vertical_scan[64] = {
79 0, 8, 16, 24, 1, 9, 2, 10,
80 17, 25, 32, 40, 48, 56, 57, 49,
81 41, 33, 26, 18, 3, 11, 4, 12,
82 19, 27, 34, 42, 50, 58, 35, 43,
83 51, 59, 20, 28, 5, 13, 6, 14,
84 21, 29, 36, 44, 52, 60, 37, 45,
85 53, 61, 22, 30, 7, 15, 23, 31,
86 38, 46, 54, 62, 39, 47, 55, 63,
87};
88
0a8d8945 89/* Input permutation for the simple_idct_mmx */
5a240838 90static UINT8 simple_mmx_permutation[64]={
0a8d8945
MN
91 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
92 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
93 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
94 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
95 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
96 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
97 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
98 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
5a240838
MN
99};
100
2f349de2
MN
101/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
102UINT32 inverse[256]={
103 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
104 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
105 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
106 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
107 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
108 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
109 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
110 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
111 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
112 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
113 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
114 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
115 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
116 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
117 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
118 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
119 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
120 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
121 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
122 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
123 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
124 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
125 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
126 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
127 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
128 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
129 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
130 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
131 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
132 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
133 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
134 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
135};
136
badaf88e
MN
137/* used to skip zeros at the end */
138UINT8 zigzag_end[64];
139
5a240838
MN
140UINT8 permutation[64];
141//UINT8 invPermutation[64];
142
badaf88e
MN
143static void build_zigzag_end()
144{
145 int lastIndex;
146 int lastIndexAfterPerm=0;
147 for(lastIndex=0; lastIndex<64; lastIndex++)
148 {
149 if(zigzag_direct[lastIndex] > lastIndexAfterPerm)
150 lastIndexAfterPerm= zigzag_direct[lastIndex];
151 zigzag_end[lastIndex]= lastIndexAfterPerm + 1;
152 }
153}
154
de6d9b64
FB
155void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size)
156{
157 DCTELEM *p;
158 const UINT8 *pix;
159 int i;
160
161 /* read the pixels */
162 p = block;
163 pix = pixels;
164 for(i=0;i<8;i++) {
165 p[0] = pix[0];
166 p[1] = pix[1];
167 p[2] = pix[2];
168 p[3] = pix[3];
169 p[4] = pix[4];
170 p[5] = pix[5];
171 p[6] = pix[6];
172 p[7] = pix[7];
173 pix += line_size;
174 p += 8;
175 }
176}
177
178void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
179{
180 const DCTELEM *p;
181 UINT8 *pix;
182 int i;
183 UINT8 *cm = cropTbl + MAX_NEG_CROP;
184
185 /* read the pixels */
186 p = block;
187 pix = pixels;
188 for(i=0;i<8;i++) {
189 pix[0] = cm[p[0]];
190 pix[1] = cm[p[1]];
191 pix[2] = cm[p[2]];
192 pix[3] = cm[p[3]];
193 pix[4] = cm[p[4]];
194 pix[5] = cm[p[5]];
195 pix[6] = cm[p[6]];
196 pix[7] = cm[p[7]];
197 pix += line_size;
198 p += 8;
199 }
200}
201
202void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
203{
204 const DCTELEM *p;
205 UINT8 *pix;
206 int i;
207 UINT8 *cm = cropTbl + MAX_NEG_CROP;
208
209 /* read the pixels */
210 p = block;
211 pix = pixels;
212 for(i=0;i<8;i++) {
213 pix[0] = cm[pix[0] + p[0]];
214 pix[1] = cm[pix[1] + p[1]];
215 pix[2] = cm[pix[2] + p[2]];
216 pix[3] = cm[pix[3] + p[3]];
217 pix[4] = cm[pix[4] + p[4]];
218 pix[5] = cm[pix[5] + p[5]];
219 pix[6] = cm[pix[6] + p[6]];
220 pix[7] = cm[pix[7] + p[7]];
221 pix += line_size;
222 p += 8;
223 }
224}
225
226#define PIXOP(BTYPE, OPNAME, OP, INCR) \
227 \
228static void OPNAME ## _pixels(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
229{ \
230 BTYPE *p; \
231 const UINT8 *pix; \
232 \
233 p = block; \
234 pix = pixels; \
235 do { \
236 OP(p[0], pix[0]); \
237 OP(p[1], pix[1]); \
238 OP(p[2], pix[2]); \
239 OP(p[3], pix[3]); \
240 OP(p[4], pix[4]); \
241 OP(p[5], pix[5]); \
242 OP(p[6], pix[6]); \
243 OP(p[7], pix[7]); \
244 pix += line_size; \
245 p += INCR; \
246 } while (--h);; \
247} \
248 \
249static void OPNAME ## _pixels_x2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
250{ \
251 BTYPE *p; \
252 const UINT8 *pix; \
253 \
254 p = block; \
255 pix = pixels; \
256 do { \
257 OP(p[0], avg2(pix[0], pix[1])); \
258 OP(p[1], avg2(pix[1], pix[2])); \
259 OP(p[2], avg2(pix[2], pix[3])); \
260 OP(p[3], avg2(pix[3], pix[4])); \
261 OP(p[4], avg2(pix[4], pix[5])); \
262 OP(p[5], avg2(pix[5], pix[6])); \
263 OP(p[6], avg2(pix[6], pix[7])); \
264 OP(p[7], avg2(pix[7], pix[8])); \
265 pix += line_size; \
266 p += INCR; \
267 } while (--h); \
268} \
269 \
270static void OPNAME ## _pixels_y2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
271{ \
272 BTYPE *p; \
273 const UINT8 *pix; \
274 const UINT8 *pix1; \
275 \
276 p = block; \
277 pix = pixels; \
278 pix1 = pixels + line_size; \
279 do { \
280 OP(p[0], avg2(pix[0], pix1[0])); \
281 OP(p[1], avg2(pix[1], pix1[1])); \
282 OP(p[2], avg2(pix[2], pix1[2])); \
283 OP(p[3], avg2(pix[3], pix1[3])); \
284 OP(p[4], avg2(pix[4], pix1[4])); \
285 OP(p[5], avg2(pix[5], pix1[5])); \
286 OP(p[6], avg2(pix[6], pix1[6])); \
287 OP(p[7], avg2(pix[7], pix1[7])); \
288 pix += line_size; \
289 pix1 += line_size; \
290 p += INCR; \
291 } while(--h); \
292} \
293 \
294static void OPNAME ## _pixels_xy2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
295{ \
296 BTYPE *p; \
297 const UINT8 *pix; \
298 const UINT8 *pix1; \
299 \
300 p = block; \
301 pix = pixels; \
302 pix1 = pixels + line_size; \
303 do { \
304 OP(p[0], avg4(pix[0], pix[1], pix1[0], pix1[1])); \
305 OP(p[1], avg4(pix[1], pix[2], pix1[1], pix1[2])); \
306 OP(p[2], avg4(pix[2], pix[3], pix1[2], pix1[3])); \
307 OP(p[3], avg4(pix[3], pix[4], pix1[3], pix1[4])); \
308 OP(p[4], avg4(pix[4], pix[5], pix1[4], pix1[5])); \
309 OP(p[5], avg4(pix[5], pix[6], pix1[5], pix1[6])); \
310 OP(p[6], avg4(pix[6], pix[7], pix1[6], pix1[7])); \
311 OP(p[7], avg4(pix[7], pix[8], pix1[7], pix1[8])); \
312 pix += line_size; \
313 pix1 += line_size; \
314 p += INCR; \
315 } while(--h); \
316} \
317 \
318void (*OPNAME ## _pixels_tab[4])(BTYPE *block, const UINT8 *pixels, int line_size, int h) = { \
319 OPNAME ## _pixels, \
320 OPNAME ## _pixels_x2, \
321 OPNAME ## _pixels_y2, \
322 OPNAME ## _pixels_xy2, \
323};
324
325
326/* rounding primitives */
327#define avg2(a,b) ((a+b+1)>>1)
328#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
329
330#define op_put(a, b) a = b
331#define op_avg(a, b) a = avg2(a, b)
332#define op_sub(a, b) a -= b
333
334PIXOP(UINT8, put, op_put, line_size)
335PIXOP(UINT8, avg, op_avg, line_size)
336
337PIXOP(DCTELEM, sub, op_sub, 8)
338
339/* not rounding primitives */
340#undef avg2
341#undef avg4
342#define avg2(a,b) ((a+b)>>1)
343#define avg4(a,b,c,d) ((a+b+c+d+1)>>2)
344
345PIXOP(UINT8, put_no_rnd, op_put, line_size)
346PIXOP(UINT8, avg_no_rnd, op_avg, line_size)
347
348/* motion estimation */
349
350#undef avg2
351#undef avg4
352#define avg2(a,b) ((a+b+1)>>1)
353#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
354
44eb4951
MN
355static void gmc1_c(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder)
356{
357 const int A=(16-x16)*(16-y16);
358 const int B=( x16)*(16-y16);
359 const int C=(16-x16)*( y16);
360 const int D=( x16)*( y16);
361 int i;
362 rounder= 128 - rounder;
363
364 for(i=0; i<h; i++)
365 {
366 dst[0]= (A*src[0] + B*src[1] + C*src[srcStride+0] + D*src[srcStride+1] + rounder)>>8;
367 dst[1]= (A*src[1] + B*src[2] + C*src[srcStride+1] + D*src[srcStride+2] + rounder)>>8;
368 dst[2]= (A*src[2] + B*src[3] + C*src[srcStride+2] + D*src[srcStride+3] + rounder)>>8;
369 dst[3]= (A*src[3] + B*src[4] + C*src[srcStride+3] + D*src[srcStride+4] + rounder)>>8;
370 dst[4]= (A*src[4] + B*src[5] + C*src[srcStride+4] + D*src[srcStride+5] + rounder)>>8;
371 dst[5]= (A*src[5] + B*src[6] + C*src[srcStride+5] + D*src[srcStride+6] + rounder)>>8;
372 dst[6]= (A*src[6] + B*src[7] + C*src[srcStride+6] + D*src[srcStride+7] + rounder)>>8;
373 dst[7]= (A*src[7] + B*src[8] + C*src[srcStride+7] + D*src[srcStride+8] + rounder)>>8;
374 dst+= srcStride;
375 src+= srcStride;
376 }
377}
378
379static void qpel_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h, int r)
380{
381 UINT8 *cm = cropTbl + MAX_NEG_CROP;
382 int i;
383 for(i=0; i<h; i++)
384 {
ba6802de
MN
385 dst[0]= cm[(((src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]) + r)>>5)];
386 dst[1]= cm[(((src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]) + r)>>5)];
387 dst[2]= cm[(((src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]) + r)>>5)];
388 dst[3]= cm[(((src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]) + r)>>5)];
389 dst[4]= cm[(((src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]) + r)>>5)];
390 dst[5]= cm[(((src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]) + r)>>5)];
391 dst[6]= cm[(((src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]) + r)>>5)];
392 dst[7]= cm[(((src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]) + r)>>5)];
44eb4951
MN
393 dst+=dstStride;
394 src+=srcStride;
395 }
396}
397
398static void qpel_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int w, int r)
399{
400 UINT8 *cm = cropTbl + MAX_NEG_CROP;
401 int i;
402 for(i=0; i<w; i++)
403 {
404 const int src0= src[0*srcStride];
405 const int src1= src[1*srcStride];
406 const int src2= src[2*srcStride];
407 const int src3= src[3*srcStride];
408 const int src4= src[4*srcStride];
409 const int src5= src[5*srcStride];
410 const int src6= src[6*srcStride];
411 const int src7= src[7*srcStride];
412 const int src8= src[8*srcStride];
ba6802de
MN
413 dst[0*dstStride]= cm[(((src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4) + r)>>5)];
414 dst[1*dstStride]= cm[(((src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5) + r)>>5)];
415 dst[2*dstStride]= cm[(((src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6) + r)>>5)];
416 dst[3*dstStride]= cm[(((src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7) + r)>>5)];
417 dst[4*dstStride]= cm[(((src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8) + r)>>5)];
418 dst[5*dstStride]= cm[(((src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8) + r)>>5)];
419 dst[6*dstStride]= cm[(((src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7) + r)>>5)];
420 dst[7*dstStride]= cm[(((src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6) + r)>>5)];
44eb4951
MN
421 dst++;
422 src++;
423 }
424}
425
426static inline void put_block(UINT8 *dst, UINT8 *src, int dstStride, int srcStride)
427{
428 int i;
429 for(i=0; i<8; i++)
430 {
431 dst[0]= src[0];
432 dst[1]= src[1];
433 dst[2]= src[2];
434 dst[3]= src[3];
435 dst[4]= src[4];
436 dst[5]= src[5];
437 dst[6]= src[6];
438 dst[7]= src[7];
439 dst+=dstStride;
440 src+=srcStride;
441 }
442}
443
444static inline void avg2_block(UINT8 *dst, UINT8 *src1, UINT8 *src2, int dstStride, int srcStride, int r)
445{
446 int i;
447 for(i=0; i<8; i++)
448 {
449 dst[0]= (src1[0] + src2[0] + r)>>1;
450 dst[1]= (src1[1] + src2[1] + r)>>1;
451 dst[2]= (src1[2] + src2[2] + r)>>1;
452 dst[3]= (src1[3] + src2[3] + r)>>1;
453 dst[4]= (src1[4] + src2[4] + r)>>1;
454 dst[5]= (src1[5] + src2[5] + r)>>1;
455 dst[6]= (src1[6] + src2[6] + r)>>1;
456 dst[7]= (src1[7] + src2[7] + r)>>1;
457 dst+=dstStride;
458 src1+=srcStride;
459 src2+=8;
460 }
461}
462
463static inline void avg4_block(UINT8 *dst, UINT8 *src1, UINT8 *src2, UINT8 *src3, UINT8 *src4, int dstStride, int srcStride, int r)
464{
465 int i;
466 for(i=0; i<8; i++)
467 {
468 dst[0]= (src1[0] + src2[0] + src3[0] + src4[0] + r)>>2;
469 dst[1]= (src1[1] + src2[1] + src3[1] + src4[1] + r)>>2;
470 dst[2]= (src1[2] + src2[2] + src3[2] + src4[2] + r)>>2;
471 dst[3]= (src1[3] + src2[3] + src3[3] + src4[3] + r)>>2;
472 dst[4]= (src1[4] + src2[4] + src3[4] + src4[4] + r)>>2;
473 dst[5]= (src1[5] + src2[5] + src3[5] + src4[5] + r)>>2;
474 dst[6]= (src1[6] + src2[6] + src3[6] + src4[6] + r)>>2;
475 dst[7]= (src1[7] + src2[7] + src3[7] + src4[7] + r)>>2;
476 dst+=dstStride;
477 src1+=srcStride;
478 src2+=8;
7ff037e9 479 src3+=8;
44eb4951
MN
480 src4+=8;
481 }
482}
483
484#define QPEL_MC(r, name) \
485static void qpel_mc00_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
486{\
487 put_block(dst, src, dstStride, srcStride);\
488}\
489\
490static void qpel_mc10_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
491{\
492 UINT8 half[64];\
ba6802de 493 qpel_h_lowpass(half, src, 8, srcStride, 8, 16-r);\
44eb4951
MN
494 avg2_block(dst, src, half, dstStride, srcStride, 1-r);\
495}\
496\
497static void qpel_mc20_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
498{\
ba6802de 499 qpel_h_lowpass(dst, src, dstStride, srcStride, 8, 16-r);\
44eb4951
MN
500}\
501\
502static void qpel_mc30_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
503{\
504 UINT8 half[64];\
ba6802de 505 qpel_h_lowpass(half, src, 8, srcStride, 8, 16-r);\
44eb4951
MN
506 avg2_block(dst, src+1, half, dstStride, srcStride, 1-r);\
507}\
508\
509static void qpel_mc01_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
510{\
511 UINT8 half[64];\
ba6802de 512 qpel_v_lowpass(half, src, 8, srcStride, 8, 16-r);\
44eb4951
MN
513 avg2_block(dst, src, half, dstStride, srcStride, 1-r);\
514}\
515\
516static void qpel_mc02_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
517{\
ba6802de 518 qpel_v_lowpass(dst, src, dstStride, srcStride, 8, 16-r);\
44eb4951
MN
519}\
520\
521static void qpel_mc03_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
522{\
523 UINT8 half[64];\
ba6802de 524 qpel_v_lowpass(half, src, 8, srcStride, 8, 16-r);\
44eb4951
MN
525 avg2_block(dst, src+srcStride, half, dstStride, srcStride, 1-r);\
526}\
527static void qpel_mc11_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
528{\
529 UINT8 halfH[72];\
7ff037e9 530 UINT8 halfV[64];\
44eb4951 531 UINT8 halfHV[64];\
ba6802de
MN
532 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
533 qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\
534 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
44eb4951
MN
535 avg4_block(dst, src, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
536}\
537static void qpel_mc31_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
538{\
539 UINT8 halfH[72];\
7ff037e9 540 UINT8 halfV[64];\
44eb4951 541 UINT8 halfHV[64];\
ba6802de
MN
542 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
543 qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\
544 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
44eb4951
MN
545 avg4_block(dst, src+1, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
546}\
547static void qpel_mc13_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
548{\
549 UINT8 halfH[72];\
7ff037e9 550 UINT8 halfV[64];\
44eb4951 551 UINT8 halfHV[64];\
ba6802de
MN
552 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
553 qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\
554 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
7ff037e9 555 avg4_block(dst, src+srcStride, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\
44eb4951
MN
556}\
557static void qpel_mc33_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
558{\
559 UINT8 halfH[72];\
7ff037e9 560 UINT8 halfV[64];\
44eb4951 561 UINT8 halfHV[64];\
ba6802de
MN
562 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
563 qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\
564 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
7ff037e9 565 avg4_block(dst, src+srcStride+1, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\
44eb4951
MN
566}\
567static void qpel_mc21_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
568{\
569 UINT8 halfH[72];\
570 UINT8 halfHV[64];\
ba6802de
MN
571 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
572 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
44eb4951
MN
573 avg2_block(dst, halfH, halfHV, dstStride, 8, 1-r);\
574}\
575static void qpel_mc23_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
576{\
577 UINT8 halfH[72];\
578 UINT8 halfHV[64];\
ba6802de
MN
579 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
580 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
44eb4951
MN
581 avg2_block(dst, halfH+8, halfHV, dstStride, 8, 1-r);\
582}\
583static void qpel_mc12_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
584{\
585 UINT8 halfH[72];\
7ff037e9 586 UINT8 halfV[64];\
44eb4951 587 UINT8 halfHV[64];\
ba6802de
MN
588 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
589 qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\
590 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
7ff037e9 591 avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\
44eb4951
MN
592}\
593static void qpel_mc32_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
594{\
595 UINT8 halfH[72];\
7ff037e9 596 UINT8 halfV[64];\
44eb4951 597 UINT8 halfHV[64];\
ba6802de
MN
598 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
599 qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\
600 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
7ff037e9 601 avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\
44eb4951
MN
602}\
603static void qpel_mc22_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
604{\
605 UINT8 halfH[72];\
ba6802de
MN
606 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
607 qpel_v_lowpass(dst, halfH, dstStride, 8, 8, 16-r);\
44eb4951
MN
608}\
609qpel_mc_func qpel_mc ## name ## _tab[16]={ \
610 qpel_mc00_c ## name, \
611 qpel_mc10_c ## name, \
612 qpel_mc20_c ## name, \
613 qpel_mc30_c ## name, \
614 qpel_mc01_c ## name, \
615 qpel_mc11_c ## name, \
616 qpel_mc21_c ## name, \
617 qpel_mc31_c ## name, \
618 qpel_mc02_c ## name, \
619 qpel_mc12_c ## name, \
620 qpel_mc22_c ## name, \
621 qpel_mc32_c ## name, \
622 qpel_mc03_c ## name, \
623 qpel_mc13_c ## name, \
624 qpel_mc23_c ## name, \
625 qpel_mc33_c ## name, \
626};
627
628QPEL_MC(0, _rnd)
629QPEL_MC(1, _no_rnd)
630
ba6802de 631int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
de6d9b64
FB
632{
633 int s, i;
634
635 s = 0;
ba6802de 636 for(i=0;i<16;i++) {
de6d9b64
FB
637 s += abs(pix1[0] - pix2[0]);
638 s += abs(pix1[1] - pix2[1]);
639 s += abs(pix1[2] - pix2[2]);
640 s += abs(pix1[3] - pix2[3]);
641 s += abs(pix1[4] - pix2[4]);
642 s += abs(pix1[5] - pix2[5]);
643 s += abs(pix1[6] - pix2[6]);
644 s += abs(pix1[7] - pix2[7]);
645 s += abs(pix1[8] - pix2[8]);
646 s += abs(pix1[9] - pix2[9]);
647 s += abs(pix1[10] - pix2[10]);
648 s += abs(pix1[11] - pix2[11]);
649 s += abs(pix1[12] - pix2[12]);
650 s += abs(pix1[13] - pix2[13]);
651 s += abs(pix1[14] - pix2[14]);
652 s += abs(pix1[15] - pix2[15]);
653 pix1 += line_size;
654 pix2 += line_size;
655 }
656 return s;
657}
658
ba6802de 659int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
de6d9b64
FB
660{
661 int s, i;
662
663 s = 0;
ba6802de 664 for(i=0;i<16;i++) {
de6d9b64
FB
665 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
666 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
667 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
668 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
669 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
670 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
671 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
672 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
673 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
674 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
675 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
676 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
677 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
678 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
679 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
680 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
681 pix1 += line_size;
682 pix2 += line_size;
683 }
684 return s;
685}
686
ba6802de 687int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
de6d9b64
FB
688{
689 int s, i;
690 UINT8 *pix3 = pix2 + line_size;
691
692 s = 0;
ba6802de 693 for(i=0;i<16;i++) {
de6d9b64
FB
694 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
695 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
696 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
697 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
698 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
699 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
700 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
701 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
702 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
703 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
704 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
705 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
706 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
707 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
708 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
709 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
710 pix1 += line_size;
711 pix2 += line_size;
712 pix3 += line_size;
713 }
714 return s;
715}
716
ba6802de 717int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
de6d9b64
FB
718{
719 int s, i;
720 UINT8 *pix3 = pix2 + line_size;
721
722 s = 0;
ba6802de 723 for(i=0;i<16;i++) {
de6d9b64
FB
724 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
725 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
726 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
727 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
728 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
729 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
730 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
731 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
732 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
733 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
734 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
735 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
736 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
737 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
738 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
739 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
740 pix1 += line_size;
741 pix2 += line_size;
742 pix3 += line_size;
743 }
744 return s;
745}
746
ba6802de
MN
747int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
748{
749 int s, i;
750
751 s = 0;
752 for(i=0;i<8;i++) {
753 s += abs(pix1[0] - pix2[0]);
754 s += abs(pix1[1] - pix2[1]);
755 s += abs(pix1[2] - pix2[2]);
756 s += abs(pix1[3] - pix2[3]);
757 s += abs(pix1[4] - pix2[4]);
758 s += abs(pix1[5] - pix2[5]);
759 s += abs(pix1[6] - pix2[6]);
760 s += abs(pix1[7] - pix2[7]);
761 pix1 += line_size;
762 pix2 += line_size;
763 }
764 return s;
765}
766
767int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
768{
769 int s, i;
770
771 s = 0;
772 for(i=0;i<8;i++) {
773 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
774 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
775 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
776 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
777 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
778 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
779 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
780 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
781 pix1 += line_size;
782 pix2 += line_size;
783 }
784 return s;
785}
786
787int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
788{
789 int s, i;
790 UINT8 *pix3 = pix2 + line_size;
791
792 s = 0;
793 for(i=0;i<8;i++) {
794 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
795 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
796 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
797 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
798 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
799 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
800 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
801 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
802 pix1 += line_size;
803 pix2 += line_size;
804 pix3 += line_size;
805 }
806 return s;
807}
808
809int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
810{
811 int s, i;
812 UINT8 *pix3 = pix2 + line_size;
813
814 s = 0;
815 for(i=0;i<8;i++) {
816 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
817 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
818 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
819 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
820 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
821 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
822 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
823 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
824 pix1 += line_size;
825 pix2 += line_size;
826 pix3 += line_size;
827 }
828 return s;
829}
830
e0eac44e
FB
831/* permute block according so that it corresponds to the MMX idct
832 order */
d962f6fd 833#ifdef SIMPLE_IDCT
5a240838 834 /* general permutation, but perhaps slightly slower */
d962f6fd
A
835void block_permute(INT16 *block)
836{
837 int i;
838 INT16 temp[64];
839
d962f6fd
A
840 for(i=0; i<64; i++) temp[ block_permute_op(i) ] = block[i];
841
842 for(i=0; i<64; i++) block[i] = temp[i];
d962f6fd 843}
d962f6fd
A
844#else
845
e0eac44e 846void block_permute(INT16 *block)
de6d9b64 847{
e0eac44e 848 int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
de6d9b64
FB
849 int i;
850
e0eac44e
FB
851 for(i=0;i<8;i++) {
852 tmp1 = block[1];
853 tmp2 = block[2];
854 tmp3 = block[3];
855 tmp4 = block[4];
856 tmp5 = block[5];
857 tmp6 = block[6];
858 block[1] = tmp2;
859 block[2] = tmp4;
860 block[3] = tmp6;
861 block[4] = tmp1;
862 block[5] = tmp3;
863 block[6] = tmp5;
864 block += 8;
865 }
866}
d962f6fd 867#endif
e0eac44e
FB
868
869void dsputil_init(void)
870{
871 int i, j;
c34270f5 872 int use_permuted_idct;
e0eac44e 873
de6d9b64
FB
874 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
875 for(i=0;i<MAX_NEG_CROP;i++) {
876 cropTbl[i] = 0;
877 cropTbl[i + MAX_NEG_CROP + 256] = 255;
878 }
879
880 for(i=0;i<512;i++) {
881 squareTbl[i] = (i - 256) * (i - 256);
882 }
883
d962f6fd
A
884#ifdef SIMPLE_IDCT
885 ff_idct = simple_idct;
886#else
4af7bcc1 887 ff_idct = j_rev_dct;
d962f6fd 888#endif
de6d9b64
FB
889 get_pixels = get_pixels_c;
890 put_pixels_clamped = put_pixels_clamped_c;
891 add_pixels_clamped = add_pixels_clamped_c;
44eb4951 892 gmc1= gmc1_c;
de6d9b64 893
ba6802de
MN
894 pix_abs16x16 = pix_abs16x16_c;
895 pix_abs16x16_x2 = pix_abs16x16_x2_c;
896 pix_abs16x16_y2 = pix_abs16x16_y2_c;
de6d9b64 897 pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
ba6802de
MN
898 pix_abs8x8 = pix_abs8x8_c;
899 pix_abs8x8_x2 = pix_abs8x8_x2_c;
900 pix_abs8x8_y2 = pix_abs8x8_y2_c;
901 pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
de6d9b64
FB
902 av_fdct = jpeg_fdct_ifast;
903
c34270f5 904 use_permuted_idct = 1;
e0eac44e 905
980fc7b8 906#ifdef HAVE_MMX
de6d9b64
FB
907 dsputil_init_mmx();
908#endif
3d03c0a2
FB
909#ifdef ARCH_ARMV4L
910 dsputil_init_armv4l();
911#endif
c34270f5
FB
912#ifdef HAVE_MLIB
913 dsputil_init_mlib();
914 use_permuted_idct = 0;
915#endif
1e98dffb
NK
916#ifdef ARCH_ALPHA
917 dsputil_init_alpha();
918 use_permuted_idct = 0;
919#endif
c34270f5 920
d962f6fd
A
921#ifdef SIMPLE_IDCT
922 if(ff_idct == simple_idct) use_permuted_idct=0;
923#endif
924
5a240838
MN
925 if(use_permuted_idct)
926#ifdef SIMPLE_IDCT
927 for(i=0; i<64; i++) permutation[i]= simple_mmx_permutation[i];
928#else
929 for(i=0; i<64; i++) permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
930#endif
931 else
932 for(i=0; i<64; i++) permutation[i]=i;
933
2f349de2
MN
934 for(i=0; i<64; i++) inv_zigzag_direct16[zigzag_direct[i]]= i+1;
935 for(i=0; i<64; i++) zigzag_direct_noperm[i]= zigzag_direct[i];
936
c34270f5
FB
937 if (use_permuted_idct) {
938 /* permute for IDCT */
939 for(i=0;i<64;i++) {
940 j = zigzag_direct[i];
941 zigzag_direct[i] = block_permute_op(j);
942 j = ff_alternate_horizontal_scan[i];
943 ff_alternate_horizontal_scan[i] = block_permute_op(j);
944 j = ff_alternate_vertical_scan[i];
945 ff_alternate_vertical_scan[i] = block_permute_op(j);
946 }
947 block_permute(default_intra_matrix);
948 block_permute(default_non_intra_matrix);
949 }
badaf88e
MN
950
951 build_zigzag_end();
de6d9b64 952}
43f1708f
J
953
954void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3],
955 int orig_linesize[3], int coded_linesize,
956 AVCodecContext *avctx)
957{
958 int quad, diff, x, y;
959 UINT8 *orig, *coded;
960 UINT32 *sq = squareTbl + 256;
961
962 quad = 0;
963 diff = 0;
964
965 /* Luminance */
966 orig = orig_image[0];
967 coded = coded_image[0];
968
969 for (y=0;y<avctx->height;y++) {
970 for (x=0;x<avctx->width;x++) {
971 diff = *(orig + x) - *(coded + x);
972 quad += sq[diff];
973 }
974 orig += orig_linesize[0];
975 coded += coded_linesize;
976 }
977
978 avctx->psnr_y = (float) quad / (float) (avctx->width * avctx->height);
979
980 if (avctx->psnr_y) {
981 avctx->psnr_y = (float) (255 * 255) / avctx->psnr_y;
982 avctx->psnr_y = 10 * (float) log10 (avctx->psnr_y);
983 } else
984 avctx->psnr_y = 99.99;
985}
986