10l
[libav.git] / libavcodec / dsputil.c
CommitLineData
de6d9b64
FB
1/*
2 * DSP utils
3 * Copyright (c) 2000, 2001 Gerard Lantau.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
7ff037e9
MN
18 *
19 * gmc & q-pel support by Michael Niedermayer <michaelni@gmx.at>
de6d9b64
FB
20 */
21#include <stdlib.h>
22#include <stdio.h>
43f1708f 23#include <math.h>
de6d9b64
FB
24#include "avcodec.h"
25#include "dsputil.h"
d962f6fd 26#include "simple_idct.h"
de6d9b64 27
4af7bcc1 28void (*ff_idct)(DCTELEM *block);
de6d9b64
FB
29void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
30void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
31void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
44eb4951 32void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
de6d9b64
FB
33
34op_pixels_abs_func pix_abs16x16;
35op_pixels_abs_func pix_abs16x16_x2;
36op_pixels_abs_func pix_abs16x16_y2;
37op_pixels_abs_func pix_abs16x16_xy2;
38
0cfa9713 39UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
de6d9b64
FB
40UINT32 squareTbl[512];
41
e0eac44e
FB
42extern UINT16 default_intra_matrix[64];
43extern UINT16 default_non_intra_matrix[64];
44
45UINT8 zigzag_direct[64] = {
46 0, 1, 8, 16, 9, 2, 3, 10,
47 17, 24, 32, 25, 18, 11, 4, 5,
48 12, 19, 26, 33, 40, 48, 41, 34,
49 27, 20, 13, 6, 7, 14, 21, 28,
50 35, 42, 49, 56, 57, 50, 43, 36,
51 29, 22, 15, 23, 30, 37, 44, 51,
52 58, 59, 52, 45, 38, 31, 39, 46,
53 53, 60, 61, 54, 47, 55, 62, 63
54};
55
2f349de2
MN
56/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
57UINT16 __align8 inv_zigzag_direct16[64];
58
59/* not permutated zigzag_direct for MMX quantizer */
60UINT8 zigzag_direct_noperm[64];
61
e0eac44e
FB
62UINT8 ff_alternate_horizontal_scan[64] = {
63 0, 1, 2, 3, 8, 9, 16, 17,
64 10, 11, 4, 5, 6, 7, 15, 14,
65 13, 12, 19, 18, 24, 25, 32, 33,
66 26, 27, 20, 21, 22, 23, 28, 29,
67 30, 31, 34, 35, 40, 41, 48, 49,
68 42, 43, 36, 37, 38, 39, 44, 45,
69 46, 47, 50, 51, 56, 57, 58, 59,
70 52, 53, 54, 55, 60, 61, 62, 63,
71};
72
73UINT8 ff_alternate_vertical_scan[64] = {
74 0, 8, 16, 24, 1, 9, 2, 10,
75 17, 25, 32, 40, 48, 56, 57, 49,
76 41, 33, 26, 18, 3, 11, 4, 12,
77 19, 27, 34, 42, 50, 58, 35, 43,
78 51, 59, 20, 28, 5, 13, 6, 14,
79 21, 29, 36, 44, 52, 60, 37, 45,
80 53, 61, 22, 30, 7, 15, 23, 31,
81 38, 46, 54, 62, 39, 47, 55, 63,
82};
83
0a8d8945 84/* Input permutation for the simple_idct_mmx */
5a240838 85static UINT8 simple_mmx_permutation[64]={
0a8d8945
MN
86 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
87 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
88 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
89 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
90 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
91 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
92 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
93 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
5a240838
MN
94};
95
2f349de2
MN
96/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
97UINT32 inverse[256]={
98 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
99 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
100 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
101 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
102 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
103 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
104 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
105 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
106 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
107 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
108 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
109 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
110 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
111 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
112 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
113 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
114 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
115 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
116 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
117 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
118 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
119 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
120 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
121 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
122 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
123 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
124 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
125 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
126 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
127 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
128 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
129 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
130};
131
badaf88e
MN
132/* used to skip zeros at the end */
133UINT8 zigzag_end[64];
134
5a240838
MN
135UINT8 permutation[64];
136//UINT8 invPermutation[64];
137
badaf88e
MN
138static void build_zigzag_end()
139{
140 int lastIndex;
141 int lastIndexAfterPerm=0;
142 for(lastIndex=0; lastIndex<64; lastIndex++)
143 {
144 if(zigzag_direct[lastIndex] > lastIndexAfterPerm)
145 lastIndexAfterPerm= zigzag_direct[lastIndex];
146 zigzag_end[lastIndex]= lastIndexAfterPerm + 1;
147 }
148}
149
de6d9b64
FB
150void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size)
151{
152 DCTELEM *p;
153 const UINT8 *pix;
154 int i;
155
156 /* read the pixels */
157 p = block;
158 pix = pixels;
159 for(i=0;i<8;i++) {
160 p[0] = pix[0];
161 p[1] = pix[1];
162 p[2] = pix[2];
163 p[3] = pix[3];
164 p[4] = pix[4];
165 p[5] = pix[5];
166 p[6] = pix[6];
167 p[7] = pix[7];
168 pix += line_size;
169 p += 8;
170 }
171}
172
173void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
174{
175 const DCTELEM *p;
176 UINT8 *pix;
177 int i;
178 UINT8 *cm = cropTbl + MAX_NEG_CROP;
179
180 /* read the pixels */
181 p = block;
182 pix = pixels;
183 for(i=0;i<8;i++) {
184 pix[0] = cm[p[0]];
185 pix[1] = cm[p[1]];
186 pix[2] = cm[p[2]];
187 pix[3] = cm[p[3]];
188 pix[4] = cm[p[4]];
189 pix[5] = cm[p[5]];
190 pix[6] = cm[p[6]];
191 pix[7] = cm[p[7]];
192 pix += line_size;
193 p += 8;
194 }
195}
196
197void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
198{
199 const DCTELEM *p;
200 UINT8 *pix;
201 int i;
202 UINT8 *cm = cropTbl + MAX_NEG_CROP;
203
204 /* read the pixels */
205 p = block;
206 pix = pixels;
207 for(i=0;i<8;i++) {
208 pix[0] = cm[pix[0] + p[0]];
209 pix[1] = cm[pix[1] + p[1]];
210 pix[2] = cm[pix[2] + p[2]];
211 pix[3] = cm[pix[3] + p[3]];
212 pix[4] = cm[pix[4] + p[4]];
213 pix[5] = cm[pix[5] + p[5]];
214 pix[6] = cm[pix[6] + p[6]];
215 pix[7] = cm[pix[7] + p[7]];
216 pix += line_size;
217 p += 8;
218 }
219}
220
221#define PIXOP(BTYPE, OPNAME, OP, INCR) \
222 \
223static void OPNAME ## _pixels(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
224{ \
225 BTYPE *p; \
226 const UINT8 *pix; \
227 \
228 p = block; \
229 pix = pixels; \
230 do { \
231 OP(p[0], pix[0]); \
232 OP(p[1], pix[1]); \
233 OP(p[2], pix[2]); \
234 OP(p[3], pix[3]); \
235 OP(p[4], pix[4]); \
236 OP(p[5], pix[5]); \
237 OP(p[6], pix[6]); \
238 OP(p[7], pix[7]); \
239 pix += line_size; \
240 p += INCR; \
241 } while (--h);; \
242} \
243 \
244static void OPNAME ## _pixels_x2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
245{ \
246 BTYPE *p; \
247 const UINT8 *pix; \
248 \
249 p = block; \
250 pix = pixels; \
251 do { \
252 OP(p[0], avg2(pix[0], pix[1])); \
253 OP(p[1], avg2(pix[1], pix[2])); \
254 OP(p[2], avg2(pix[2], pix[3])); \
255 OP(p[3], avg2(pix[3], pix[4])); \
256 OP(p[4], avg2(pix[4], pix[5])); \
257 OP(p[5], avg2(pix[5], pix[6])); \
258 OP(p[6], avg2(pix[6], pix[7])); \
259 OP(p[7], avg2(pix[7], pix[8])); \
260 pix += line_size; \
261 p += INCR; \
262 } while (--h); \
263} \
264 \
265static void OPNAME ## _pixels_y2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
266{ \
267 BTYPE *p; \
268 const UINT8 *pix; \
269 const UINT8 *pix1; \
270 \
271 p = block; \
272 pix = pixels; \
273 pix1 = pixels + line_size; \
274 do { \
275 OP(p[0], avg2(pix[0], pix1[0])); \
276 OP(p[1], avg2(pix[1], pix1[1])); \
277 OP(p[2], avg2(pix[2], pix1[2])); \
278 OP(p[3], avg2(pix[3], pix1[3])); \
279 OP(p[4], avg2(pix[4], pix1[4])); \
280 OP(p[5], avg2(pix[5], pix1[5])); \
281 OP(p[6], avg2(pix[6], pix1[6])); \
282 OP(p[7], avg2(pix[7], pix1[7])); \
283 pix += line_size; \
284 pix1 += line_size; \
285 p += INCR; \
286 } while(--h); \
287} \
288 \
289static void OPNAME ## _pixels_xy2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
290{ \
291 BTYPE *p; \
292 const UINT8 *pix; \
293 const UINT8 *pix1; \
294 \
295 p = block; \
296 pix = pixels; \
297 pix1 = pixels + line_size; \
298 do { \
299 OP(p[0], avg4(pix[0], pix[1], pix1[0], pix1[1])); \
300 OP(p[1], avg4(pix[1], pix[2], pix1[1], pix1[2])); \
301 OP(p[2], avg4(pix[2], pix[3], pix1[2], pix1[3])); \
302 OP(p[3], avg4(pix[3], pix[4], pix1[3], pix1[4])); \
303 OP(p[4], avg4(pix[4], pix[5], pix1[4], pix1[5])); \
304 OP(p[5], avg4(pix[5], pix[6], pix1[5], pix1[6])); \
305 OP(p[6], avg4(pix[6], pix[7], pix1[6], pix1[7])); \
306 OP(p[7], avg4(pix[7], pix[8], pix1[7], pix1[8])); \
307 pix += line_size; \
308 pix1 += line_size; \
309 p += INCR; \
310 } while(--h); \
311} \
312 \
313void (*OPNAME ## _pixels_tab[4])(BTYPE *block, const UINT8 *pixels, int line_size, int h) = { \
314 OPNAME ## _pixels, \
315 OPNAME ## _pixels_x2, \
316 OPNAME ## _pixels_y2, \
317 OPNAME ## _pixels_xy2, \
318};
319
320
321/* rounding primitives */
322#define avg2(a,b) ((a+b+1)>>1)
323#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
324
325#define op_put(a, b) a = b
326#define op_avg(a, b) a = avg2(a, b)
327#define op_sub(a, b) a -= b
328
329PIXOP(UINT8, put, op_put, line_size)
330PIXOP(UINT8, avg, op_avg, line_size)
331
332PIXOP(DCTELEM, sub, op_sub, 8)
333
334/* not rounding primitives */
335#undef avg2
336#undef avg4
337#define avg2(a,b) ((a+b)>>1)
338#define avg4(a,b,c,d) ((a+b+c+d+1)>>2)
339
340PIXOP(UINT8, put_no_rnd, op_put, line_size)
341PIXOP(UINT8, avg_no_rnd, op_avg, line_size)
342
343/* motion estimation */
344
345#undef avg2
346#undef avg4
347#define avg2(a,b) ((a+b+1)>>1)
348#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
349
44eb4951
MN
350static void gmc1_c(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder)
351{
352 const int A=(16-x16)*(16-y16);
353 const int B=( x16)*(16-y16);
354 const int C=(16-x16)*( y16);
355 const int D=( x16)*( y16);
356 int i;
357 rounder= 128 - rounder;
358
359 for(i=0; i<h; i++)
360 {
361 dst[0]= (A*src[0] + B*src[1] + C*src[srcStride+0] + D*src[srcStride+1] + rounder)>>8;
362 dst[1]= (A*src[1] + B*src[2] + C*src[srcStride+1] + D*src[srcStride+2] + rounder)>>8;
363 dst[2]= (A*src[2] + B*src[3] + C*src[srcStride+2] + D*src[srcStride+3] + rounder)>>8;
364 dst[3]= (A*src[3] + B*src[4] + C*src[srcStride+3] + D*src[srcStride+4] + rounder)>>8;
365 dst[4]= (A*src[4] + B*src[5] + C*src[srcStride+4] + D*src[srcStride+5] + rounder)>>8;
366 dst[5]= (A*src[5] + B*src[6] + C*src[srcStride+5] + D*src[srcStride+6] + rounder)>>8;
367 dst[6]= (A*src[6] + B*src[7] + C*src[srcStride+6] + D*src[srcStride+7] + rounder)>>8;
368 dst[7]= (A*src[7] + B*src[8] + C*src[srcStride+7] + D*src[srcStride+8] + rounder)>>8;
369 dst+= srcStride;
370 src+= srcStride;
371 }
372}
373
374static void qpel_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h, int r)
375{
376 UINT8 *cm = cropTbl + MAX_NEG_CROP;
377 int i;
378 for(i=0; i<h; i++)
379 {
380 dst[0]= cm[(((src[0]+src[1])*160 - (src[0]+src[2])*48 + (src[1]+src[3])*24 - (src[2]+src[4])*8 + r)>>8)];
381 dst[1]= cm[(((src[1]+src[2])*160 - (src[0]+src[3])*48 + (src[0]+src[4])*24 - (src[1]+src[5])*8 + r)>>8)];
382 dst[2]= cm[(((src[2]+src[3])*160 - (src[1]+src[4])*48 + (src[0]+src[5])*24 - (src[0]+src[6])*8 + r)>>8)];
383 dst[3]= cm[(((src[3]+src[4])*160 - (src[2]+src[5])*48 + (src[1]+src[6])*24 - (src[0]+src[7])*8 + r)>>8)];
384 dst[4]= cm[(((src[4]+src[5])*160 - (src[3]+src[6])*48 + (src[2]+src[7])*24 - (src[1]+src[8])*8 + r)>>8)];
385 dst[5]= cm[(((src[5]+src[6])*160 - (src[4]+src[7])*48 + (src[3]+src[8])*24 - (src[2]+src[8])*8 + r)>>8)];
386 dst[6]= cm[(((src[6]+src[7])*160 - (src[5]+src[8])*48 + (src[4]+src[8])*24 - (src[3]+src[7])*8 + r)>>8)];
387 dst[7]= cm[(((src[7]+src[8])*160 - (src[6]+src[8])*48 + (src[5]+src[7])*24 - (src[4]+src[6])*8 + r)>>8)];
388 dst+=dstStride;
389 src+=srcStride;
390 }
391}
392
393static void qpel_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int w, int r)
394{
395 UINT8 *cm = cropTbl + MAX_NEG_CROP;
396 int i;
397 for(i=0; i<w; i++)
398 {
399 const int src0= src[0*srcStride];
400 const int src1= src[1*srcStride];
401 const int src2= src[2*srcStride];
402 const int src3= src[3*srcStride];
403 const int src4= src[4*srcStride];
404 const int src5= src[5*srcStride];
405 const int src6= src[6*srcStride];
406 const int src7= src[7*srcStride];
407 const int src8= src[8*srcStride];
408 dst[0*dstStride]= cm[(((src0+src1)*160 - (src0+src2)*48 + (src1+src3)*24 - (src2+src4)*8 + r)>>8)];
409 dst[1*dstStride]= cm[(((src1+src2)*160 - (src0+src3)*48 + (src0+src4)*24 - (src1+src5)*8 + r)>>8)];
410 dst[2*dstStride]= cm[(((src2+src3)*160 - (src1+src4)*48 + (src0+src5)*24 - (src0+src6)*8 + r)>>8)];
411 dst[3*dstStride]= cm[(((src3+src4)*160 - (src2+src5)*48 + (src1+src6)*24 - (src0+src7)*8 + r)>>8)];
412 dst[4*dstStride]= cm[(((src4+src5)*160 - (src3+src6)*48 + (src2+src7)*24 - (src1+src8)*8 + r)>>8)];
413 dst[5*dstStride]= cm[(((src5+src6)*160 - (src4+src7)*48 + (src3+src8)*24 - (src2+src8)*8 + r)>>8)];
414 dst[6*dstStride]= cm[(((src6+src7)*160 - (src5+src8)*48 + (src4+src8)*24 - (src3+src7)*8 + r)>>8)];
415 dst[7*dstStride]= cm[(((src7+src8)*160 - (src6+src8)*48 + (src5+src7)*24 - (src4+src6)*8 + r)>>8)];
416 dst++;
417 src++;
418 }
419}
420
421static inline void put_block(UINT8 *dst, UINT8 *src, int dstStride, int srcStride)
422{
423 int i;
424 for(i=0; i<8; i++)
425 {
426 dst[0]= src[0];
427 dst[1]= src[1];
428 dst[2]= src[2];
429 dst[3]= src[3];
430 dst[4]= src[4];
431 dst[5]= src[5];
432 dst[6]= src[6];
433 dst[7]= src[7];
434 dst+=dstStride;
435 src+=srcStride;
436 }
437}
438
439static inline void avg2_block(UINT8 *dst, UINT8 *src1, UINT8 *src2, int dstStride, int srcStride, int r)
440{
441 int i;
442 for(i=0; i<8; i++)
443 {
444 dst[0]= (src1[0] + src2[0] + r)>>1;
445 dst[1]= (src1[1] + src2[1] + r)>>1;
446 dst[2]= (src1[2] + src2[2] + r)>>1;
447 dst[3]= (src1[3] + src2[3] + r)>>1;
448 dst[4]= (src1[4] + src2[4] + r)>>1;
449 dst[5]= (src1[5] + src2[5] + r)>>1;
450 dst[6]= (src1[6] + src2[6] + r)>>1;
451 dst[7]= (src1[7] + src2[7] + r)>>1;
452 dst+=dstStride;
453 src1+=srcStride;
454 src2+=8;
455 }
456}
457
458static inline void avg4_block(UINT8 *dst, UINT8 *src1, UINT8 *src2, UINT8 *src3, UINT8 *src4, int dstStride, int srcStride, int r)
459{
460 int i;
461 for(i=0; i<8; i++)
462 {
463 dst[0]= (src1[0] + src2[0] + src3[0] + src4[0] + r)>>2;
464 dst[1]= (src1[1] + src2[1] + src3[1] + src4[1] + r)>>2;
465 dst[2]= (src1[2] + src2[2] + src3[2] + src4[2] + r)>>2;
466 dst[3]= (src1[3] + src2[3] + src3[3] + src4[3] + r)>>2;
467 dst[4]= (src1[4] + src2[4] + src3[4] + src4[4] + r)>>2;
468 dst[5]= (src1[5] + src2[5] + src3[5] + src4[5] + r)>>2;
469 dst[6]= (src1[6] + src2[6] + src3[6] + src4[6] + r)>>2;
470 dst[7]= (src1[7] + src2[7] + src3[7] + src4[7] + r)>>2;
471 dst+=dstStride;
472 src1+=srcStride;
473 src2+=8;
7ff037e9 474 src3+=8;
44eb4951
MN
475 src4+=8;
476 }
477}
478
479#define QPEL_MC(r, name) \
480static void qpel_mc00_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
481{\
482 put_block(dst, src, dstStride, srcStride);\
483}\
484\
485static void qpel_mc10_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
486{\
487 UINT8 half[64];\
488 qpel_h_lowpass(half, src, 8, srcStride, 8, 128-r);\
489 avg2_block(dst, src, half, dstStride, srcStride, 1-r);\
490}\
491\
492static void qpel_mc20_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
493{\
494 qpel_h_lowpass(dst, src, dstStride, srcStride, 8, 128-r);\
495}\
496\
497static void qpel_mc30_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
498{\
499 UINT8 half[64];\
500 qpel_h_lowpass(half, src, 8, srcStride, 8, 128-r);\
501 avg2_block(dst, src+1, half, dstStride, srcStride, 1-r);\
502}\
503\
504static void qpel_mc01_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
505{\
506 UINT8 half[64];\
507 qpel_v_lowpass(half, src, 8, srcStride, 8, 128-r);\
508 avg2_block(dst, src, half, dstStride, srcStride, 1-r);\
509}\
510\
511static void qpel_mc02_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
512{\
513 qpel_v_lowpass(dst, src, dstStride, srcStride, 8, 128-r);\
514}\
515\
516static void qpel_mc03_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
517{\
518 UINT8 half[64];\
519 qpel_v_lowpass(half, src, 8, srcStride, 8, 128-r);\
520 avg2_block(dst, src+srcStride, half, dstStride, srcStride, 1-r);\
521}\
522static void qpel_mc11_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
523{\
524 UINT8 halfH[72];\
7ff037e9 525 UINT8 halfV[64];\
44eb4951
MN
526 UINT8 halfHV[64];\
527 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
7ff037e9 528 qpel_v_lowpass(halfV, src, 8, srcStride, 8, 128-r);\
44eb4951
MN
529 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
530 avg4_block(dst, src, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
531}\
532static void qpel_mc31_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
533{\
534 UINT8 halfH[72];\
7ff037e9 535 UINT8 halfV[64];\
44eb4951
MN
536 UINT8 halfHV[64];\
537 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
7ff037e9 538 qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 128-r);\
44eb4951
MN
539 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
540 avg4_block(dst, src+1, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
541}\
542static void qpel_mc13_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
543{\
544 UINT8 halfH[72];\
7ff037e9 545 UINT8 halfV[64];\
44eb4951
MN
546 UINT8 halfHV[64];\
547 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
7ff037e9 548 qpel_v_lowpass(halfV, src, 8, srcStride, 8, 128-r);\
44eb4951 549 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
7ff037e9 550 avg4_block(dst, src+srcStride, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\
44eb4951
MN
551}\
552static void qpel_mc33_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
553{\
554 UINT8 halfH[72];\
7ff037e9 555 UINT8 halfV[64];\
44eb4951
MN
556 UINT8 halfHV[64];\
557 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
7ff037e9 558 qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 128-r);\
44eb4951 559 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
7ff037e9 560 avg4_block(dst, src+srcStride+1, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\
44eb4951
MN
561}\
562static void qpel_mc21_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
563{\
564 UINT8 halfH[72];\
565 UINT8 halfHV[64];\
566 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
567 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
568 avg2_block(dst, halfH, halfHV, dstStride, 8, 1-r);\
569}\
570static void qpel_mc23_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
571{\
572 UINT8 halfH[72];\
573 UINT8 halfHV[64];\
574 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
575 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
576 avg2_block(dst, halfH+8, halfHV, dstStride, 8, 1-r);\
577}\
578static void qpel_mc12_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
579{\
580 UINT8 halfH[72];\
7ff037e9 581 UINT8 halfV[64];\
44eb4951
MN
582 UINT8 halfHV[64];\
583 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
7ff037e9 584 qpel_v_lowpass(halfV, src, 8, srcStride, 8, 128-r);\
44eb4951 585 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
7ff037e9 586 avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\
44eb4951
MN
587}\
588static void qpel_mc32_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
589{\
590 UINT8 halfH[72];\
7ff037e9 591 UINT8 halfV[64];\
44eb4951
MN
592 UINT8 halfHV[64];\
593 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
7ff037e9 594 qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 128-r);\
44eb4951 595 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
7ff037e9 596 avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\
44eb4951
MN
597}\
598static void qpel_mc22_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
599{\
600 UINT8 halfH[72];\
601 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
602 qpel_v_lowpass(dst, halfH, dstStride, 8, 8, 128-r);\
603}\
604qpel_mc_func qpel_mc ## name ## _tab[16]={ \
605 qpel_mc00_c ## name, \
606 qpel_mc10_c ## name, \
607 qpel_mc20_c ## name, \
608 qpel_mc30_c ## name, \
609 qpel_mc01_c ## name, \
610 qpel_mc11_c ## name, \
611 qpel_mc21_c ## name, \
612 qpel_mc31_c ## name, \
613 qpel_mc02_c ## name, \
614 qpel_mc12_c ## name, \
615 qpel_mc22_c ## name, \
616 qpel_mc32_c ## name, \
617 qpel_mc03_c ## name, \
618 qpel_mc13_c ## name, \
619 qpel_mc23_c ## name, \
620 qpel_mc33_c ## name, \
621};
622
623QPEL_MC(0, _rnd)
624QPEL_MC(1, _no_rnd)
625
de6d9b64
FB
626int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
627{
628 int s, i;
629
630 s = 0;
631 for(i=0;i<h;i++) {
632 s += abs(pix1[0] - pix2[0]);
633 s += abs(pix1[1] - pix2[1]);
634 s += abs(pix1[2] - pix2[2]);
635 s += abs(pix1[3] - pix2[3]);
636 s += abs(pix1[4] - pix2[4]);
637 s += abs(pix1[5] - pix2[5]);
638 s += abs(pix1[6] - pix2[6]);
639 s += abs(pix1[7] - pix2[7]);
640 s += abs(pix1[8] - pix2[8]);
641 s += abs(pix1[9] - pix2[9]);
642 s += abs(pix1[10] - pix2[10]);
643 s += abs(pix1[11] - pix2[11]);
644 s += abs(pix1[12] - pix2[12]);
645 s += abs(pix1[13] - pix2[13]);
646 s += abs(pix1[14] - pix2[14]);
647 s += abs(pix1[15] - pix2[15]);
648 pix1 += line_size;
649 pix2 += line_size;
650 }
651 return s;
652}
653
654int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
655{
656 int s, i;
657
658 s = 0;
659 for(i=0;i<h;i++) {
660 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
661 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
662 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
663 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
664 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
665 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
666 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
667 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
668 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
669 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
670 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
671 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
672 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
673 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
674 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
675 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
676 pix1 += line_size;
677 pix2 += line_size;
678 }
679 return s;
680}
681
682int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
683{
684 int s, i;
685 UINT8 *pix3 = pix2 + line_size;
686
687 s = 0;
688 for(i=0;i<h;i++) {
689 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
690 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
691 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
692 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
693 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
694 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
695 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
696 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
697 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
698 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
699 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
700 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
701 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
702 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
703 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
704 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
705 pix1 += line_size;
706 pix2 += line_size;
707 pix3 += line_size;
708 }
709 return s;
710}
711
712int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
713{
714 int s, i;
715 UINT8 *pix3 = pix2 + line_size;
716
717 s = 0;
718 for(i=0;i<h;i++) {
719 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
720 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
721 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
722 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
723 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
724 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
725 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
726 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
727 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
728 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
729 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
730 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
731 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
732 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
733 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
734 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
735 pix1 += line_size;
736 pix2 += line_size;
737 pix3 += line_size;
738 }
739 return s;
740}
741
e0eac44e
FB
742/* permute block according so that it corresponds to the MMX idct
743 order */
d962f6fd 744#ifdef SIMPLE_IDCT
5a240838 745 /* general permutation, but perhaps slightly slower */
d962f6fd
A
746void block_permute(INT16 *block)
747{
748 int i;
749 INT16 temp[64];
750
d962f6fd
A
751 for(i=0; i<64; i++) temp[ block_permute_op(i) ] = block[i];
752
753 for(i=0; i<64; i++) block[i] = temp[i];
d962f6fd 754}
d962f6fd
A
755#else
756
e0eac44e 757void block_permute(INT16 *block)
de6d9b64 758{
e0eac44e 759 int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
de6d9b64
FB
760 int i;
761
e0eac44e
FB
762 for(i=0;i<8;i++) {
763 tmp1 = block[1];
764 tmp2 = block[2];
765 tmp3 = block[3];
766 tmp4 = block[4];
767 tmp5 = block[5];
768 tmp6 = block[6];
769 block[1] = tmp2;
770 block[2] = tmp4;
771 block[3] = tmp6;
772 block[4] = tmp1;
773 block[5] = tmp3;
774 block[6] = tmp5;
775 block += 8;
776 }
777}
d962f6fd 778#endif
e0eac44e
FB
779
780void dsputil_init(void)
781{
782 int i, j;
c34270f5 783 int use_permuted_idct;
e0eac44e 784
de6d9b64
FB
785 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
786 for(i=0;i<MAX_NEG_CROP;i++) {
787 cropTbl[i] = 0;
788 cropTbl[i + MAX_NEG_CROP + 256] = 255;
789 }
790
791 for(i=0;i<512;i++) {
792 squareTbl[i] = (i - 256) * (i - 256);
793 }
794
d962f6fd
A
795#ifdef SIMPLE_IDCT
796 ff_idct = simple_idct;
797#else
4af7bcc1 798 ff_idct = j_rev_dct;
d962f6fd 799#endif
de6d9b64
FB
800 get_pixels = get_pixels_c;
801 put_pixels_clamped = put_pixels_clamped_c;
802 add_pixels_clamped = add_pixels_clamped_c;
44eb4951 803 gmc1= gmc1_c;
de6d9b64
FB
804
805 pix_abs16x16 = pix_abs16x16_c;
806 pix_abs16x16_x2 = pix_abs16x16_x2_c;
807 pix_abs16x16_y2 = pix_abs16x16_y2_c;
808 pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
809 av_fdct = jpeg_fdct_ifast;
810
c34270f5 811 use_permuted_idct = 1;
e0eac44e 812
980fc7b8 813#ifdef HAVE_MMX
de6d9b64
FB
814 dsputil_init_mmx();
815#endif
3d03c0a2
FB
816#ifdef ARCH_ARMV4L
817 dsputil_init_armv4l();
818#endif
c34270f5
FB
819#ifdef HAVE_MLIB
820 dsputil_init_mlib();
821 use_permuted_idct = 0;
822#endif
1e98dffb
NK
823#ifdef ARCH_ALPHA
824 dsputil_init_alpha();
825 use_permuted_idct = 0;
826#endif
c34270f5 827
d962f6fd
A
828#ifdef SIMPLE_IDCT
829 if(ff_idct == simple_idct) use_permuted_idct=0;
830#endif
831
5a240838
MN
832 if(use_permuted_idct)
833#ifdef SIMPLE_IDCT
834 for(i=0; i<64; i++) permutation[i]= simple_mmx_permutation[i];
835#else
836 for(i=0; i<64; i++) permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
837#endif
838 else
839 for(i=0; i<64; i++) permutation[i]=i;
840
2f349de2
MN
841 for(i=0; i<64; i++) inv_zigzag_direct16[zigzag_direct[i]]= i+1;
842 for(i=0; i<64; i++) zigzag_direct_noperm[i]= zigzag_direct[i];
843
c34270f5
FB
844 if (use_permuted_idct) {
845 /* permute for IDCT */
846 for(i=0;i<64;i++) {
847 j = zigzag_direct[i];
848 zigzag_direct[i] = block_permute_op(j);
849 j = ff_alternate_horizontal_scan[i];
850 ff_alternate_horizontal_scan[i] = block_permute_op(j);
851 j = ff_alternate_vertical_scan[i];
852 ff_alternate_vertical_scan[i] = block_permute_op(j);
853 }
854 block_permute(default_intra_matrix);
855 block_permute(default_non_intra_matrix);
856 }
badaf88e
MN
857
858 build_zigzag_end();
de6d9b64 859}
43f1708f
J
860
861void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3],
862 int orig_linesize[3], int coded_linesize,
863 AVCodecContext *avctx)
864{
865 int quad, diff, x, y;
866 UINT8 *orig, *coded;
867 UINT32 *sq = squareTbl + 256;
868
869 quad = 0;
870 diff = 0;
871
872 /* Luminance */
873 orig = orig_image[0];
874 coded = coded_image[0];
875
876 for (y=0;y<avctx->height;y++) {
877 for (x=0;x<avctx->width;x++) {
878 diff = *(orig + x) - *(coded + x);
879 quad += sq[diff];
880 }
881 orig += orig_linesize[0];
882 coded += coded_linesize;
883 }
884
885 avctx->psnr_y = (float) quad / (float) (avctx->width * avctx->height);
886
887 if (avctx->psnr_y) {
888 avctx->psnr_y = (float) (255 * 255) / avctx->psnr_y;
889 avctx->psnr_y = 10 * (float) log10 (avctx->psnr_y);
890 } else
891 avctx->psnr_y = 99.99;
892}
893