divx5-gmc support
[libav.git] / libavcodec / dsputil.c
CommitLineData
de6d9b64
FB
1/*
2 * DSP utils
3 * Copyright (c) 2000, 2001 Gerard Lantau.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19#include <stdlib.h>
20#include <stdio.h>
43f1708f 21#include <math.h>
de6d9b64
FB
22#include "avcodec.h"
23#include "dsputil.h"
d962f6fd 24#include "simple_idct.h"
de6d9b64 25
4af7bcc1 26void (*ff_idct)(DCTELEM *block);
de6d9b64
FB
27void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
28void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
29void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
44eb4951 30void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
de6d9b64
FB
31
32op_pixels_abs_func pix_abs16x16;
33op_pixels_abs_func pix_abs16x16_x2;
34op_pixels_abs_func pix_abs16x16_y2;
35op_pixels_abs_func pix_abs16x16_xy2;
36
0cfa9713 37UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
de6d9b64
FB
38UINT32 squareTbl[512];
39
e0eac44e
FB
40extern UINT16 default_intra_matrix[64];
41extern UINT16 default_non_intra_matrix[64];
42
43UINT8 zigzag_direct[64] = {
44 0, 1, 8, 16, 9, 2, 3, 10,
45 17, 24, 32, 25, 18, 11, 4, 5,
46 12, 19, 26, 33, 40, 48, 41, 34,
47 27, 20, 13, 6, 7, 14, 21, 28,
48 35, 42, 49, 56, 57, 50, 43, 36,
49 29, 22, 15, 23, 30, 37, 44, 51,
50 58, 59, 52, 45, 38, 31, 39, 46,
51 53, 60, 61, 54, 47, 55, 62, 63
52};
53
2f349de2
MN
54/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
55UINT16 __align8 inv_zigzag_direct16[64];
56
57/* not permutated zigzag_direct for MMX quantizer */
58UINT8 zigzag_direct_noperm[64];
59
e0eac44e
FB
60UINT8 ff_alternate_horizontal_scan[64] = {
61 0, 1, 2, 3, 8, 9, 16, 17,
62 10, 11, 4, 5, 6, 7, 15, 14,
63 13, 12, 19, 18, 24, 25, 32, 33,
64 26, 27, 20, 21, 22, 23, 28, 29,
65 30, 31, 34, 35, 40, 41, 48, 49,
66 42, 43, 36, 37, 38, 39, 44, 45,
67 46, 47, 50, 51, 56, 57, 58, 59,
68 52, 53, 54, 55, 60, 61, 62, 63,
69};
70
71UINT8 ff_alternate_vertical_scan[64] = {
72 0, 8, 16, 24, 1, 9, 2, 10,
73 17, 25, 32, 40, 48, 56, 57, 49,
74 41, 33, 26, 18, 3, 11, 4, 12,
75 19, 27, 34, 42, 50, 58, 35, 43,
76 51, 59, 20, 28, 5, 13, 6, 14,
77 21, 29, 36, 44, 52, 60, 37, 45,
78 53, 61, 22, 30, 7, 15, 23, 31,
79 38, 46, 54, 62, 39, 47, 55, 63,
80};
81
0a8d8945 82/* Input permutation for the simple_idct_mmx */
5a240838 83static UINT8 simple_mmx_permutation[64]={
0a8d8945
MN
84 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
85 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
86 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
87 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
88 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
89 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
90 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
91 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
5a240838
MN
92};
93
2f349de2
MN
94/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
95UINT32 inverse[256]={
96 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
97 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
98 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
99 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
100 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
101 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
102 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
103 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
104 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
105 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
106 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
107 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
108 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
109 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
110 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
111 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
112 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
113 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
114 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
115 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
116 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
117 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
118 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
119 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
120 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
121 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
122 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
123 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
124 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
125 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
126 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
127 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
128};
129
badaf88e
MN
130/* used to skip zeros at the end */
131UINT8 zigzag_end[64];
132
5a240838
MN
133UINT8 permutation[64];
134//UINT8 invPermutation[64];
135
badaf88e
MN
136static void build_zigzag_end()
137{
138 int lastIndex;
139 int lastIndexAfterPerm=0;
140 for(lastIndex=0; lastIndex<64; lastIndex++)
141 {
142 if(zigzag_direct[lastIndex] > lastIndexAfterPerm)
143 lastIndexAfterPerm= zigzag_direct[lastIndex];
144 zigzag_end[lastIndex]= lastIndexAfterPerm + 1;
145 }
146}
147
de6d9b64
FB
148void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size)
149{
150 DCTELEM *p;
151 const UINT8 *pix;
152 int i;
153
154 /* read the pixels */
155 p = block;
156 pix = pixels;
157 for(i=0;i<8;i++) {
158 p[0] = pix[0];
159 p[1] = pix[1];
160 p[2] = pix[2];
161 p[3] = pix[3];
162 p[4] = pix[4];
163 p[5] = pix[5];
164 p[6] = pix[6];
165 p[7] = pix[7];
166 pix += line_size;
167 p += 8;
168 }
169}
170
171void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
172{
173 const DCTELEM *p;
174 UINT8 *pix;
175 int i;
176 UINT8 *cm = cropTbl + MAX_NEG_CROP;
177
178 /* read the pixels */
179 p = block;
180 pix = pixels;
181 for(i=0;i<8;i++) {
182 pix[0] = cm[p[0]];
183 pix[1] = cm[p[1]];
184 pix[2] = cm[p[2]];
185 pix[3] = cm[p[3]];
186 pix[4] = cm[p[4]];
187 pix[5] = cm[p[5]];
188 pix[6] = cm[p[6]];
189 pix[7] = cm[p[7]];
190 pix += line_size;
191 p += 8;
192 }
193}
194
195void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
196{
197 const DCTELEM *p;
198 UINT8 *pix;
199 int i;
200 UINT8 *cm = cropTbl + MAX_NEG_CROP;
201
202 /* read the pixels */
203 p = block;
204 pix = pixels;
205 for(i=0;i<8;i++) {
206 pix[0] = cm[pix[0] + p[0]];
207 pix[1] = cm[pix[1] + p[1]];
208 pix[2] = cm[pix[2] + p[2]];
209 pix[3] = cm[pix[3] + p[3]];
210 pix[4] = cm[pix[4] + p[4]];
211 pix[5] = cm[pix[5] + p[5]];
212 pix[6] = cm[pix[6] + p[6]];
213 pix[7] = cm[pix[7] + p[7]];
214 pix += line_size;
215 p += 8;
216 }
217}
218
219#define PIXOP(BTYPE, OPNAME, OP, INCR) \
220 \
221static void OPNAME ## _pixels(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
222{ \
223 BTYPE *p; \
224 const UINT8 *pix; \
225 \
226 p = block; \
227 pix = pixels; \
228 do { \
229 OP(p[0], pix[0]); \
230 OP(p[1], pix[1]); \
231 OP(p[2], pix[2]); \
232 OP(p[3], pix[3]); \
233 OP(p[4], pix[4]); \
234 OP(p[5], pix[5]); \
235 OP(p[6], pix[6]); \
236 OP(p[7], pix[7]); \
237 pix += line_size; \
238 p += INCR; \
239 } while (--h);; \
240} \
241 \
242static void OPNAME ## _pixels_x2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
243{ \
244 BTYPE *p; \
245 const UINT8 *pix; \
246 \
247 p = block; \
248 pix = pixels; \
249 do { \
250 OP(p[0], avg2(pix[0], pix[1])); \
251 OP(p[1], avg2(pix[1], pix[2])); \
252 OP(p[2], avg2(pix[2], pix[3])); \
253 OP(p[3], avg2(pix[3], pix[4])); \
254 OP(p[4], avg2(pix[4], pix[5])); \
255 OP(p[5], avg2(pix[5], pix[6])); \
256 OP(p[6], avg2(pix[6], pix[7])); \
257 OP(p[7], avg2(pix[7], pix[8])); \
258 pix += line_size; \
259 p += INCR; \
260 } while (--h); \
261} \
262 \
263static void OPNAME ## _pixels_y2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
264{ \
265 BTYPE *p; \
266 const UINT8 *pix; \
267 const UINT8 *pix1; \
268 \
269 p = block; \
270 pix = pixels; \
271 pix1 = pixels + line_size; \
272 do { \
273 OP(p[0], avg2(pix[0], pix1[0])); \
274 OP(p[1], avg2(pix[1], pix1[1])); \
275 OP(p[2], avg2(pix[2], pix1[2])); \
276 OP(p[3], avg2(pix[3], pix1[3])); \
277 OP(p[4], avg2(pix[4], pix1[4])); \
278 OP(p[5], avg2(pix[5], pix1[5])); \
279 OP(p[6], avg2(pix[6], pix1[6])); \
280 OP(p[7], avg2(pix[7], pix1[7])); \
281 pix += line_size; \
282 pix1 += line_size; \
283 p += INCR; \
284 } while(--h); \
285} \
286 \
287static void OPNAME ## _pixels_xy2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
288{ \
289 BTYPE *p; \
290 const UINT8 *pix; \
291 const UINT8 *pix1; \
292 \
293 p = block; \
294 pix = pixels; \
295 pix1 = pixels + line_size; \
296 do { \
297 OP(p[0], avg4(pix[0], pix[1], pix1[0], pix1[1])); \
298 OP(p[1], avg4(pix[1], pix[2], pix1[1], pix1[2])); \
299 OP(p[2], avg4(pix[2], pix[3], pix1[2], pix1[3])); \
300 OP(p[3], avg4(pix[3], pix[4], pix1[3], pix1[4])); \
301 OP(p[4], avg4(pix[4], pix[5], pix1[4], pix1[5])); \
302 OP(p[5], avg4(pix[5], pix[6], pix1[5], pix1[6])); \
303 OP(p[6], avg4(pix[6], pix[7], pix1[6], pix1[7])); \
304 OP(p[7], avg4(pix[7], pix[8], pix1[7], pix1[8])); \
305 pix += line_size; \
306 pix1 += line_size; \
307 p += INCR; \
308 } while(--h); \
309} \
310 \
311void (*OPNAME ## _pixels_tab[4])(BTYPE *block, const UINT8 *pixels, int line_size, int h) = { \
312 OPNAME ## _pixels, \
313 OPNAME ## _pixels_x2, \
314 OPNAME ## _pixels_y2, \
315 OPNAME ## _pixels_xy2, \
316};
317
318
319/* rounding primitives */
320#define avg2(a,b) ((a+b+1)>>1)
321#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
322
323#define op_put(a, b) a = b
324#define op_avg(a, b) a = avg2(a, b)
325#define op_sub(a, b) a -= b
326
327PIXOP(UINT8, put, op_put, line_size)
328PIXOP(UINT8, avg, op_avg, line_size)
329
330PIXOP(DCTELEM, sub, op_sub, 8)
331
332/* not rounding primitives */
333#undef avg2
334#undef avg4
335#define avg2(a,b) ((a+b)>>1)
336#define avg4(a,b,c,d) ((a+b+c+d+1)>>2)
337
338PIXOP(UINT8, put_no_rnd, op_put, line_size)
339PIXOP(UINT8, avg_no_rnd, op_avg, line_size)
340
341/* motion estimation */
342
343#undef avg2
344#undef avg4
345#define avg2(a,b) ((a+b+1)>>1)
346#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
347
44eb4951
MN
348static void gmc1_c(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder)
349{
350 const int A=(16-x16)*(16-y16);
351 const int B=( x16)*(16-y16);
352 const int C=(16-x16)*( y16);
353 const int D=( x16)*( y16);
354 int i;
355 rounder= 128 - rounder;
356
357 for(i=0; i<h; i++)
358 {
359 dst[0]= (A*src[0] + B*src[1] + C*src[srcStride+0] + D*src[srcStride+1] + rounder)>>8;
360 dst[1]= (A*src[1] + B*src[2] + C*src[srcStride+1] + D*src[srcStride+2] + rounder)>>8;
361 dst[2]= (A*src[2] + B*src[3] + C*src[srcStride+2] + D*src[srcStride+3] + rounder)>>8;
362 dst[3]= (A*src[3] + B*src[4] + C*src[srcStride+3] + D*src[srcStride+4] + rounder)>>8;
363 dst[4]= (A*src[4] + B*src[5] + C*src[srcStride+4] + D*src[srcStride+5] + rounder)>>8;
364 dst[5]= (A*src[5] + B*src[6] + C*src[srcStride+5] + D*src[srcStride+6] + rounder)>>8;
365 dst[6]= (A*src[6] + B*src[7] + C*src[srcStride+6] + D*src[srcStride+7] + rounder)>>8;
366 dst[7]= (A*src[7] + B*src[8] + C*src[srcStride+7] + D*src[srcStride+8] + rounder)>>8;
367 dst+= srcStride;
368 src+= srcStride;
369 }
370}
371
372static void qpel_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h, int r)
373{
374 UINT8 *cm = cropTbl + MAX_NEG_CROP;
375 int i;
376 for(i=0; i<h; i++)
377 {
378 dst[0]= cm[(((src[0]+src[1])*160 - (src[0]+src[2])*48 + (src[1]+src[3])*24 - (src[2]+src[4])*8 + r)>>8)];
379 dst[1]= cm[(((src[1]+src[2])*160 - (src[0]+src[3])*48 + (src[0]+src[4])*24 - (src[1]+src[5])*8 + r)>>8)];
380 dst[2]= cm[(((src[2]+src[3])*160 - (src[1]+src[4])*48 + (src[0]+src[5])*24 - (src[0]+src[6])*8 + r)>>8)];
381 dst[3]= cm[(((src[3]+src[4])*160 - (src[2]+src[5])*48 + (src[1]+src[6])*24 - (src[0]+src[7])*8 + r)>>8)];
382 dst[4]= cm[(((src[4]+src[5])*160 - (src[3]+src[6])*48 + (src[2]+src[7])*24 - (src[1]+src[8])*8 + r)>>8)];
383 dst[5]= cm[(((src[5]+src[6])*160 - (src[4]+src[7])*48 + (src[3]+src[8])*24 - (src[2]+src[8])*8 + r)>>8)];
384 dst[6]= cm[(((src[6]+src[7])*160 - (src[5]+src[8])*48 + (src[4]+src[8])*24 - (src[3]+src[7])*8 + r)>>8)];
385 dst[7]= cm[(((src[7]+src[8])*160 - (src[6]+src[8])*48 + (src[5]+src[7])*24 - (src[4]+src[6])*8 + r)>>8)];
386 dst+=dstStride;
387 src+=srcStride;
388 }
389}
390
391static void qpel_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int w, int r)
392{
393 UINT8 *cm = cropTbl + MAX_NEG_CROP;
394 int i;
395 for(i=0; i<w; i++)
396 {
397 const int src0= src[0*srcStride];
398 const int src1= src[1*srcStride];
399 const int src2= src[2*srcStride];
400 const int src3= src[3*srcStride];
401 const int src4= src[4*srcStride];
402 const int src5= src[5*srcStride];
403 const int src6= src[6*srcStride];
404 const int src7= src[7*srcStride];
405 const int src8= src[8*srcStride];
406 dst[0*dstStride]= cm[(((src0+src1)*160 - (src0+src2)*48 + (src1+src3)*24 - (src2+src4)*8 + r)>>8)];
407 dst[1*dstStride]= cm[(((src1+src2)*160 - (src0+src3)*48 + (src0+src4)*24 - (src1+src5)*8 + r)>>8)];
408 dst[2*dstStride]= cm[(((src2+src3)*160 - (src1+src4)*48 + (src0+src5)*24 - (src0+src6)*8 + r)>>8)];
409 dst[3*dstStride]= cm[(((src3+src4)*160 - (src2+src5)*48 + (src1+src6)*24 - (src0+src7)*8 + r)>>8)];
410 dst[4*dstStride]= cm[(((src4+src5)*160 - (src3+src6)*48 + (src2+src7)*24 - (src1+src8)*8 + r)>>8)];
411 dst[5*dstStride]= cm[(((src5+src6)*160 - (src4+src7)*48 + (src3+src8)*24 - (src2+src8)*8 + r)>>8)];
412 dst[6*dstStride]= cm[(((src6+src7)*160 - (src5+src8)*48 + (src4+src8)*24 - (src3+src7)*8 + r)>>8)];
413 dst[7*dstStride]= cm[(((src7+src8)*160 - (src6+src8)*48 + (src5+src7)*24 - (src4+src6)*8 + r)>>8)];
414 dst++;
415 src++;
416 }
417}
418
419static inline void put_block(UINT8 *dst, UINT8 *src, int dstStride, int srcStride)
420{
421 int i;
422 for(i=0; i<8; i++)
423 {
424 dst[0]= src[0];
425 dst[1]= src[1];
426 dst[2]= src[2];
427 dst[3]= src[3];
428 dst[4]= src[4];
429 dst[5]= src[5];
430 dst[6]= src[6];
431 dst[7]= src[7];
432 dst+=dstStride;
433 src+=srcStride;
434 }
435}
436
437static inline void avg2_block(UINT8 *dst, UINT8 *src1, UINT8 *src2, int dstStride, int srcStride, int r)
438{
439 int i;
440 for(i=0; i<8; i++)
441 {
442 dst[0]= (src1[0] + src2[0] + r)>>1;
443 dst[1]= (src1[1] + src2[1] + r)>>1;
444 dst[2]= (src1[2] + src2[2] + r)>>1;
445 dst[3]= (src1[3] + src2[3] + r)>>1;
446 dst[4]= (src1[4] + src2[4] + r)>>1;
447 dst[5]= (src1[5] + src2[5] + r)>>1;
448 dst[6]= (src1[6] + src2[6] + r)>>1;
449 dst[7]= (src1[7] + src2[7] + r)>>1;
450 dst+=dstStride;
451 src1+=srcStride;
452 src2+=8;
453 }
454}
455
456static inline void avg4_block(UINT8 *dst, UINT8 *src1, UINT8 *src2, UINT8 *src3, UINT8 *src4, int dstStride, int srcStride, int r)
457{
458 int i;
459 for(i=0; i<8; i++)
460 {
461 dst[0]= (src1[0] + src2[0] + src3[0] + src4[0] + r)>>2;
462 dst[1]= (src1[1] + src2[1] + src3[1] + src4[1] + r)>>2;
463 dst[2]= (src1[2] + src2[2] + src3[2] + src4[2] + r)>>2;
464 dst[3]= (src1[3] + src2[3] + src3[3] + src4[3] + r)>>2;
465 dst[4]= (src1[4] + src2[4] + src3[4] + src4[4] + r)>>2;
466 dst[5]= (src1[5] + src2[5] + src3[5] + src4[5] + r)>>2;
467 dst[6]= (src1[6] + src2[6] + src3[6] + src4[6] + r)>>2;
468 dst[7]= (src1[7] + src2[7] + src3[7] + src4[7] + r)>>2;
469 dst+=dstStride;
470 src1+=srcStride;
471 src2+=8;
472 src3+=9;
473 src4+=8;
474 }
475}
476
477#define QPEL_MC(r, name) \
478static void qpel_mc00_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
479{\
480 put_block(dst, src, dstStride, srcStride);\
481}\
482\
483static void qpel_mc10_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
484{\
485 UINT8 half[64];\
486 qpel_h_lowpass(half, src, 8, srcStride, 8, 128-r);\
487 avg2_block(dst, src, half, dstStride, srcStride, 1-r);\
488}\
489\
490static void qpel_mc20_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
491{\
492 qpel_h_lowpass(dst, src, dstStride, srcStride, 8, 128-r);\
493}\
494\
495static void qpel_mc30_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
496{\
497 UINT8 half[64];\
498 qpel_h_lowpass(half, src, 8, srcStride, 8, 128-r);\
499 avg2_block(dst, src+1, half, dstStride, srcStride, 1-r);\
500}\
501\
502static void qpel_mc01_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
503{\
504 UINT8 half[64];\
505 qpel_v_lowpass(half, src, 8, srcStride, 8, 128-r);\
506 avg2_block(dst, src, half, dstStride, srcStride, 1-r);\
507}\
508\
509static void qpel_mc02_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
510{\
511 qpel_v_lowpass(dst, src, dstStride, srcStride, 8, 128-r);\
512}\
513\
514static void qpel_mc03_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
515{\
516 UINT8 half[64];\
517 qpel_v_lowpass(half, src, 8, srcStride, 8, 128-r);\
518 avg2_block(dst, src+srcStride, half, dstStride, srcStride, 1-r);\
519}\
520static void qpel_mc11_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
521{\
522 UINT8 halfH[72];\
523 UINT8 halfV[72];\
524 UINT8 halfHV[64];\
525 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
526 qpel_v_lowpass(halfV, src, 9, srcStride, 9, 128-r);\
527 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
528 avg4_block(dst, src, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
529}\
530static void qpel_mc31_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
531{\
532 UINT8 halfH[72];\
533 UINT8 halfV[72];\
534 UINT8 halfHV[64];\
535 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
536 qpel_v_lowpass(halfV, src, 9, srcStride, 9, 128-r);\
537 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
538 avg4_block(dst, src+1, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
539}\
540static void qpel_mc13_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
541{\
542 UINT8 halfH[72];\
543 UINT8 halfV[72];\
544 UINT8 halfHV[64];\
545 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
546 qpel_v_lowpass(halfV, src, 9, srcStride, 9, 128-r);\
547 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
548 avg4_block(dst, src+srcStride, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
549}\
550static void qpel_mc33_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
551{\
552 UINT8 halfH[72];\
553 UINT8 halfV[72];\
554 UINT8 halfHV[64];\
555 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
556 qpel_v_lowpass(halfV, src, 9, srcStride, 9, 128-r);\
557 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
558 avg4_block(dst, src+srcStride+1, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
559}\
560static void qpel_mc21_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
561{\
562 UINT8 halfH[72];\
563 UINT8 halfHV[64];\
564 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
565 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
566 avg2_block(dst, halfH, halfHV, dstStride, 8, 1-r);\
567}\
568static void qpel_mc23_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
569{\
570 UINT8 halfH[72];\
571 UINT8 halfHV[64];\
572 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
573 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
574 avg2_block(dst, halfH+8, halfHV, dstStride, 8, 1-r);\
575}\
576static void qpel_mc12_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
577{\
578 UINT8 halfH[72];\
579 UINT8 halfV[72];\
580 UINT8 halfHV[64];\
581 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
582 qpel_v_lowpass(halfV, src, 9, srcStride, 9, 128-r);\
583 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
584 avg2_block(dst, halfV, halfHV, dstStride, 9, 1-r);\
585}\
586static void qpel_mc32_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
587{\
588 UINT8 halfH[72];\
589 UINT8 halfV[72];\
590 UINT8 halfHV[64];\
591 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
592 qpel_v_lowpass(halfV, src, 9, srcStride, 9, 128-r);\
593 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
594 avg2_block(dst, halfV+1, halfHV, dstStride, 9, 1-r);\
595}\
596static void qpel_mc22_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
597{\
598 UINT8 halfH[72];\
599 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
600 qpel_v_lowpass(dst, halfH, dstStride, 8, 8, 128-r);\
601}\
602qpel_mc_func qpel_mc ## name ## _tab[16]={ \
603 qpel_mc00_c ## name, \
604 qpel_mc10_c ## name, \
605 qpel_mc20_c ## name, \
606 qpel_mc30_c ## name, \
607 qpel_mc01_c ## name, \
608 qpel_mc11_c ## name, \
609 qpel_mc21_c ## name, \
610 qpel_mc31_c ## name, \
611 qpel_mc02_c ## name, \
612 qpel_mc12_c ## name, \
613 qpel_mc22_c ## name, \
614 qpel_mc32_c ## name, \
615 qpel_mc03_c ## name, \
616 qpel_mc13_c ## name, \
617 qpel_mc23_c ## name, \
618 qpel_mc33_c ## name, \
619};
620
621QPEL_MC(0, _rnd)
622QPEL_MC(1, _no_rnd)
623
de6d9b64
FB
624int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
625{
626 int s, i;
627
628 s = 0;
629 for(i=0;i<h;i++) {
630 s += abs(pix1[0] - pix2[0]);
631 s += abs(pix1[1] - pix2[1]);
632 s += abs(pix1[2] - pix2[2]);
633 s += abs(pix1[3] - pix2[3]);
634 s += abs(pix1[4] - pix2[4]);
635 s += abs(pix1[5] - pix2[5]);
636 s += abs(pix1[6] - pix2[6]);
637 s += abs(pix1[7] - pix2[7]);
638 s += abs(pix1[8] - pix2[8]);
639 s += abs(pix1[9] - pix2[9]);
640 s += abs(pix1[10] - pix2[10]);
641 s += abs(pix1[11] - pix2[11]);
642 s += abs(pix1[12] - pix2[12]);
643 s += abs(pix1[13] - pix2[13]);
644 s += abs(pix1[14] - pix2[14]);
645 s += abs(pix1[15] - pix2[15]);
646 pix1 += line_size;
647 pix2 += line_size;
648 }
649 return s;
650}
651
652int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
653{
654 int s, i;
655
656 s = 0;
657 for(i=0;i<h;i++) {
658 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
659 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
660 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
661 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
662 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
663 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
664 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
665 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
666 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
667 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
668 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
669 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
670 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
671 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
672 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
673 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
674 pix1 += line_size;
675 pix2 += line_size;
676 }
677 return s;
678}
679
680int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
681{
682 int s, i;
683 UINT8 *pix3 = pix2 + line_size;
684
685 s = 0;
686 for(i=0;i<h;i++) {
687 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
688 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
689 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
690 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
691 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
692 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
693 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
694 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
695 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
696 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
697 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
698 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
699 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
700 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
701 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
702 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
703 pix1 += line_size;
704 pix2 += line_size;
705 pix3 += line_size;
706 }
707 return s;
708}
709
710int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
711{
712 int s, i;
713 UINT8 *pix3 = pix2 + line_size;
714
715 s = 0;
716 for(i=0;i<h;i++) {
717 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
718 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
719 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
720 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
721 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
722 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
723 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
724 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
725 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
726 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
727 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
728 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
729 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
730 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
731 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
732 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
733 pix1 += line_size;
734 pix2 += line_size;
735 pix3 += line_size;
736 }
737 return s;
738}
739
e0eac44e
FB
740/* permute block according so that it corresponds to the MMX idct
741 order */
d962f6fd 742#ifdef SIMPLE_IDCT
5a240838 743 /* general permutation, but perhaps slightly slower */
d962f6fd
A
744void block_permute(INT16 *block)
745{
746 int i;
747 INT16 temp[64];
748
d962f6fd
A
749 for(i=0; i<64; i++) temp[ block_permute_op(i) ] = block[i];
750
751 for(i=0; i<64; i++) block[i] = temp[i];
d962f6fd 752}
d962f6fd
A
753#else
754
e0eac44e 755void block_permute(INT16 *block)
de6d9b64 756{
e0eac44e 757 int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
de6d9b64
FB
758 int i;
759
e0eac44e
FB
760 for(i=0;i<8;i++) {
761 tmp1 = block[1];
762 tmp2 = block[2];
763 tmp3 = block[3];
764 tmp4 = block[4];
765 tmp5 = block[5];
766 tmp6 = block[6];
767 block[1] = tmp2;
768 block[2] = tmp4;
769 block[3] = tmp6;
770 block[4] = tmp1;
771 block[5] = tmp3;
772 block[6] = tmp5;
773 block += 8;
774 }
775}
d962f6fd 776#endif
e0eac44e
FB
777
778void dsputil_init(void)
779{
780 int i, j;
c34270f5 781 int use_permuted_idct;
e0eac44e 782
de6d9b64
FB
783 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
784 for(i=0;i<MAX_NEG_CROP;i++) {
785 cropTbl[i] = 0;
786 cropTbl[i + MAX_NEG_CROP + 256] = 255;
787 }
788
789 for(i=0;i<512;i++) {
790 squareTbl[i] = (i - 256) * (i - 256);
791 }
792
d962f6fd
A
793#ifdef SIMPLE_IDCT
794 ff_idct = simple_idct;
795#else
4af7bcc1 796 ff_idct = j_rev_dct;
d962f6fd 797#endif
de6d9b64
FB
798 get_pixels = get_pixels_c;
799 put_pixels_clamped = put_pixels_clamped_c;
800 add_pixels_clamped = add_pixels_clamped_c;
44eb4951 801 gmc1= gmc1_c;
de6d9b64
FB
802
803 pix_abs16x16 = pix_abs16x16_c;
804 pix_abs16x16_x2 = pix_abs16x16_x2_c;
805 pix_abs16x16_y2 = pix_abs16x16_y2_c;
806 pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
807 av_fdct = jpeg_fdct_ifast;
808
c34270f5 809 use_permuted_idct = 1;
e0eac44e 810
980fc7b8 811#ifdef HAVE_MMX
de6d9b64
FB
812 dsputil_init_mmx();
813#endif
3d03c0a2
FB
814#ifdef ARCH_ARMV4L
815 dsputil_init_armv4l();
816#endif
c34270f5
FB
817#ifdef HAVE_MLIB
818 dsputil_init_mlib();
819 use_permuted_idct = 0;
820#endif
1e98dffb
NK
821#ifdef ARCH_ALPHA
822 dsputil_init_alpha();
823 use_permuted_idct = 0;
824#endif
c34270f5 825
d962f6fd
A
826#ifdef SIMPLE_IDCT
827 if(ff_idct == simple_idct) use_permuted_idct=0;
828#endif
829
5a240838
MN
830 if(use_permuted_idct)
831#ifdef SIMPLE_IDCT
832 for(i=0; i<64; i++) permutation[i]= simple_mmx_permutation[i];
833#else
834 for(i=0; i<64; i++) permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
835#endif
836 else
837 for(i=0; i<64; i++) permutation[i]=i;
838
2f349de2
MN
839 for(i=0; i<64; i++) inv_zigzag_direct16[zigzag_direct[i]]= i+1;
840 for(i=0; i<64; i++) zigzag_direct_noperm[i]= zigzag_direct[i];
841
c34270f5
FB
842 if (use_permuted_idct) {
843 /* permute for IDCT */
844 for(i=0;i<64;i++) {
845 j = zigzag_direct[i];
846 zigzag_direct[i] = block_permute_op(j);
847 j = ff_alternate_horizontal_scan[i];
848 ff_alternate_horizontal_scan[i] = block_permute_op(j);
849 j = ff_alternate_vertical_scan[i];
850 ff_alternate_vertical_scan[i] = block_permute_op(j);
851 }
852 block_permute(default_intra_matrix);
853 block_permute(default_non_intra_matrix);
854 }
badaf88e
MN
855
856 build_zigzag_end();
de6d9b64 857}
43f1708f
J
858
859void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3],
860 int orig_linesize[3], int coded_linesize,
861 AVCodecContext *avctx)
862{
863 int quad, diff, x, y;
864 UINT8 *orig, *coded;
865 UINT32 *sq = squareTbl + 256;
866
867 quad = 0;
868 diff = 0;
869
870 /* Luminance */
871 orig = orig_image[0];
872 coded = coded_image[0];
873
874 for (y=0;y<avctx->height;y++) {
875 for (x=0;x<avctx->width;x++) {
876 diff = *(orig + x) - *(coded + x);
877 quad += sq[diff];
878 }
879 orig += orig_linesize[0];
880 coded += coded_linesize;
881 }
882
883 avctx->psnr_y = (float) quad / (float) (avctx->width * avctx->height);
884
885 if (avctx->psnr_y) {
886 avctx->psnr_y = (float) (255 * 255) / avctx->psnr_y;
887 avctx->psnr_y = 10 * (float) log10 (avctx->psnr_y);
888 } else
889 avctx->psnr_y = 99.99;
890}
891