Since some junk may be or may be not present before actual VC-1 extradata,
[libav.git] / libavcodec / dsputil.c
CommitLineData
de6d9b64
FB
1/*
2 * DSP utils
406792e7 3 * Copyright (c) 2000, 2001 Fabrice Bellard
8f2ab833 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
de6d9b64 5 *
7b94177e
DB
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
7 *
b78e7197
DB
8 * This file is part of FFmpeg.
9 *
10 * FFmpeg is free software; you can redistribute it and/or
ff4ec49e
FB
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
b78e7197 13 * version 2.1 of the License, or (at your option) any later version.
de6d9b64 14 *
b78e7197 15 * FFmpeg is distributed in the hope that it will be useful,
de6d9b64 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ff4ec49e
FB
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
de6d9b64 19 *
ff4ec49e 20 * You should have received a copy of the GNU Lesser General Public
b78e7197 21 * License along with FFmpeg; if not, write to the Free Software
5509bffa 22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
de6d9b64 23 */
115329f1 24
983e3246 25/**
bad5537e 26 * @file libavcodec/dsputil.c
983e3246
MN
27 * DSP utils
28 */
115329f1 29
de6d9b64
FB
30#include "avcodec.h"
31#include "dsputil.h"
b0368839 32#include "simple_idct.h"
65e4c8c9 33#include "faandct.h"
6f08c541 34#include "faanidct.h"
199436b9 35#include "mathops.h"
eb75a698 36#include "h263.h"
059715a4 37#include "snow.h"
5596c60c 38
88730be6
MR
39/* snow.c */
40void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
41
2dac4acf
LM
42/* vorbis.c */
43void vorbis_inverse_coupling(float *mag, float *ang, int blocksize);
44
ac2e5564
LM
45/* ac3dec.c */
46void ff_ac3_downmix_c(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
47
6810b93a
LM
48/* flacenc.c */
49void ff_flac_compute_autocorr(const int32_t *data, int len, int lag, double *autoc);
50
4a9ca0a2
LM
51/* pngdec.c */
52void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp);
53
28245435
PR
54/* eaidct.c */
55void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block);
56
55fde95e 57uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
1d503957 58uint32_t ff_squareTbl[512] = {0, };
de6d9b64 59
917f55cc
LM
60// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
61#define pb_7f (~0UL/255 * 0x7f)
62#define pb_80 (~0UL/255 * 0x80)
469bd7b1 63
0c1a9eda 64const uint8_t ff_zigzag_direct[64] = {
2ad1516a
MN
65 0, 1, 8, 16, 9, 2, 3, 10,
66 17, 24, 32, 25, 18, 11, 4, 5,
e0eac44e 67 12, 19, 26, 33, 40, 48, 41, 34,
2ad1516a 68 27, 20, 13, 6, 7, 14, 21, 28,
e0eac44e
FB
69 35, 42, 49, 56, 57, 50, 43, 36,
70 29, 22, 15, 23, 30, 37, 44, 51,
71 58, 59, 52, 45, 38, 31, 39, 46,
72 53, 60, 61, 54, 47, 55, 62, 63
73};
74
10acc479
RS
75/* Specific zigzag scan for 248 idct. NOTE that unlike the
76 specification, we interleave the fields */
77const uint8_t ff_zigzag248_direct[64] = {
78 0, 8, 1, 9, 16, 24, 2, 10,
79 17, 25, 32, 40, 48, 56, 33, 41,
80 18, 26, 3, 11, 4, 12, 19, 27,
81 34, 42, 49, 57, 50, 58, 35, 43,
82 20, 28, 5, 13, 6, 14, 21, 29,
83 36, 44, 51, 59, 52, 60, 37, 45,
84 22, 30, 7, 15, 23, 31, 38, 46,
85 53, 61, 54, 62, 39, 47, 55, 63,
86};
87
2f349de2 88/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
64e657fd 89DECLARE_ALIGNED_16(uint16_t, inv_zigzag_direct16[64]);
2f349de2 90
0c1a9eda 91const uint8_t ff_alternate_horizontal_scan[64] = {
115329f1 92 0, 1, 2, 3, 8, 9, 16, 17,
e0eac44e 93 10, 11, 4, 5, 6, 7, 15, 14,
115329f1 94 13, 12, 19, 18, 24, 25, 32, 33,
e0eac44e 95 26, 27, 20, 21, 22, 23, 28, 29,
115329f1 96 30, 31, 34, 35, 40, 41, 48, 49,
e0eac44e 97 42, 43, 36, 37, 38, 39, 44, 45,
115329f1 98 46, 47, 50, 51, 56, 57, 58, 59,
e0eac44e
FB
99 52, 53, 54, 55, 60, 61, 62, 63,
100};
101
0c1a9eda 102const uint8_t ff_alternate_vertical_scan[64] = {
115329f1 103 0, 8, 16, 24, 1, 9, 2, 10,
e0eac44e 104 17, 25, 32, 40, 48, 56, 57, 49,
115329f1 105 41, 33, 26, 18, 3, 11, 4, 12,
e0eac44e 106 19, 27, 34, 42, 50, 58, 35, 43,
115329f1 107 51, 59, 20, 28, 5, 13, 6, 14,
e0eac44e 108 21, 29, 36, 44, 52, 60, 37, 45,
115329f1 109 53, 61, 22, 30, 7, 15, 23, 31,
e0eac44e
FB
110 38, 46, 54, 62, 39, 47, 55, 63,
111};
112
1a918c08
LM
113/* a*inverse[b]>>32 == a/b for all 0<=a<=16909558 && 2<=b<=256
114 * for a>16909558, is an overestimate by less than 1 part in 1<<24 */
115const uint32_t ff_inverse[257]={
115329f1
DB
116 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
117 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
118 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
119 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
120 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
121 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
122 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
123 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
124 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
125 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
126 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
127 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
128 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
129 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
130 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
131 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
132 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
133 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
134 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
135 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
136 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
137 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
138 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
139 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
140 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
141 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
142 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
143 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
144 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
145 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
146 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
2f349de2 147 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
1a918c08 148 16777216
2f349de2
MN
149};
150
b0368839
MN
151/* Input permutation for the simple_idct_mmx */
152static const uint8_t simple_mmx_permutation[64]={
bb270c08
DB
153 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
154 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
155 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
156 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
157 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
158 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
159 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
160 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
b0368839
MN
161};
162
0e956ba2
AS
163static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
164
4c79b95c
AJ
165void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
166 int i;
167 int end;
168
169 st->scantable= src_scantable;
170
171 for(i=0; i<64; i++){
172 int j;
173 j = src_scantable[i];
174 st->permutated[i] = permutation[j];
b250f9c6 175#if ARCH_PPC
4c79b95c
AJ
176 st->inverse[j] = i;
177#endif
178 }
179
180 end=-1;
181 for(i=0; i<64; i++){
182 int j;
183 j = st->permutated[i];
184 if(j>end) end=j;
185 st->raster_end[i]= end;
186 }
187}
188
0c1a9eda 189static int pix_sum_c(uint8_t * pix, int line_size)
3aa102be
MN
190{
191 int s, i, j;
192
193 s = 0;
194 for (i = 0; i < 16; i++) {
bb270c08
DB
195 for (j = 0; j < 16; j += 8) {
196 s += pix[0];
197 s += pix[1];
198 s += pix[2];
199 s += pix[3];
200 s += pix[4];
201 s += pix[5];
202 s += pix[6];
203 s += pix[7];
204 pix += 8;
205 }
206 pix += line_size - 16;
3aa102be
MN
207 }
208 return s;
209}
210
0c1a9eda 211static int pix_norm1_c(uint8_t * pix, int line_size)
3aa102be
MN
212{
213 int s, i, j;
1d503957 214 uint32_t *sq = ff_squareTbl + 256;
3aa102be
MN
215
216 s = 0;
217 for (i = 0; i < 16; i++) {
bb270c08 218 for (j = 0; j < 16; j += 8) {
2a006cd3 219#if 0
bb270c08
DB
220 s += sq[pix[0]];
221 s += sq[pix[1]];
222 s += sq[pix[2]];
223 s += sq[pix[3]];
224 s += sq[pix[4]];
225 s += sq[pix[5]];
226 s += sq[pix[6]];
227 s += sq[pix[7]];
2a006cd3
FL
228#else
229#if LONG_MAX > 2147483647
bb270c08
DB
230 register uint64_t x=*(uint64_t*)pix;
231 s += sq[x&0xff];
232 s += sq[(x>>8)&0xff];
233 s += sq[(x>>16)&0xff];
234 s += sq[(x>>24)&0xff];
2a006cd3
FL
235 s += sq[(x>>32)&0xff];
236 s += sq[(x>>40)&0xff];
237 s += sq[(x>>48)&0xff];
238 s += sq[(x>>56)&0xff];
239#else
bb270c08
DB
240 register uint32_t x=*(uint32_t*)pix;
241 s += sq[x&0xff];
242 s += sq[(x>>8)&0xff];
243 s += sq[(x>>16)&0xff];
244 s += sq[(x>>24)&0xff];
2a006cd3
FL
245 x=*(uint32_t*)(pix+4);
246 s += sq[x&0xff];
247 s += sq[(x>>8)&0xff];
248 s += sq[(x>>16)&0xff];
249 s += sq[(x>>24)&0xff];
250#endif
251#endif
bb270c08
DB
252 pix += 8;
253 }
254 pix += line_size - 16;
3aa102be
MN
255 }
256 return s;
257}
258
96711ecf 259static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
3d2e8cce 260 int i;
115329f1 261
3d2e8cce
MN
262 for(i=0; i+8<=w; i+=8){
263 dst[i+0]= bswap_32(src[i+0]);
264 dst[i+1]= bswap_32(src[i+1]);
265 dst[i+2]= bswap_32(src[i+2]);
266 dst[i+3]= bswap_32(src[i+3]);
267 dst[i+4]= bswap_32(src[i+4]);
268 dst[i+5]= bswap_32(src[i+5]);
269 dst[i+6]= bswap_32(src[i+6]);
270 dst[i+7]= bswap_32(src[i+7]);
271 }
272 for(;i<w; i++){
273 dst[i+0]= bswap_32(src[i+0]);
274 }
275}
3aa102be 276
26efc54e
MN
277static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
278{
279 int s, i;
1d503957 280 uint32_t *sq = ff_squareTbl + 256;
26efc54e
MN
281
282 s = 0;
283 for (i = 0; i < h; i++) {
284 s += sq[pix1[0] - pix2[0]];
285 s += sq[pix1[1] - pix2[1]];
286 s += sq[pix1[2] - pix2[2]];
287 s += sq[pix1[3] - pix2[3]];
288 pix1 += line_size;
289 pix2 += line_size;
290 }
291 return s;
292}
293
bb198e19 294static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
1457ab52
MN
295{
296 int s, i;
1d503957 297 uint32_t *sq = ff_squareTbl + 256;
1457ab52
MN
298
299 s = 0;
bb198e19 300 for (i = 0; i < h; i++) {
1457ab52
MN
301 s += sq[pix1[0] - pix2[0]];
302 s += sq[pix1[1] - pix2[1]];
303 s += sq[pix1[2] - pix2[2]];
304 s += sq[pix1[3] - pix2[3]];
305 s += sq[pix1[4] - pix2[4]];
306 s += sq[pix1[5] - pix2[5]];
307 s += sq[pix1[6] - pix2[6]];
308 s += sq[pix1[7] - pix2[7]];
309 pix1 += line_size;
310 pix2 += line_size;
311 }
312 return s;
313}
314
bb198e19 315static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
9c76bd48 316{
6b026927 317 int s, i;
1d503957 318 uint32_t *sq = ff_squareTbl + 256;
9c76bd48
BF
319
320 s = 0;
bb198e19 321 for (i = 0; i < h; i++) {
6b026927
FH
322 s += sq[pix1[ 0] - pix2[ 0]];
323 s += sq[pix1[ 1] - pix2[ 1]];
324 s += sq[pix1[ 2] - pix2[ 2]];
325 s += sq[pix1[ 3] - pix2[ 3]];
326 s += sq[pix1[ 4] - pix2[ 4]];
327 s += sq[pix1[ 5] - pix2[ 5]];
328 s += sq[pix1[ 6] - pix2[ 6]];
329 s += sq[pix1[ 7] - pix2[ 7]];
330 s += sq[pix1[ 8] - pix2[ 8]];
331 s += sq[pix1[ 9] - pix2[ 9]];
332 s += sq[pix1[10] - pix2[10]];
333 s += sq[pix1[11] - pix2[11]];
334 s += sq[pix1[12] - pix2[12]];
335 s += sq[pix1[13] - pix2[13]];
336 s += sq[pix1[14] - pix2[14]];
337 s += sq[pix1[15] - pix2[15]];
2a006cd3 338
6b026927
FH
339 pix1 += line_size;
340 pix2 += line_size;
9c76bd48
BF
341 }
342 return s;
343}
344
26efc54e 345
b250f9c6 346#if CONFIG_SNOW_ENCODER //dwt is in snow.c
3a6fc8fa 347static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){
26efc54e
MN
348 int s, i, j;
349 const int dec_count= w==8 ? 3 : 4;
871371a7 350 int tmp[32*32];
26efc54e 351 int level, ori;
115329f1 352 static const int scale[2][2][4][4]={
26efc54e
MN
353 {
354 {
871371a7 355 // 9/7 8x8 dec=3
26efc54e
MN
356 {268, 239, 239, 213},
357 { 0, 224, 224, 152},
358 { 0, 135, 135, 110},
359 },{
871371a7 360 // 9/7 16x16 or 32x32 dec=4
26efc54e
MN
361 {344, 310, 310, 280},
362 { 0, 320, 320, 228},
363 { 0, 175, 175, 136},
364 { 0, 129, 129, 102},
365 }
366 },{
871371a7
LM
367 {
368 // 5/3 8x8 dec=3
26efc54e
MN
369 {275, 245, 245, 218},
370 { 0, 230, 230, 156},
371 { 0, 138, 138, 113},
372 },{
871371a7 373 // 5/3 16x16 or 32x32 dec=4
26efc54e
MN
374 {352, 317, 317, 286},
375 { 0, 328, 328, 233},
376 { 0, 180, 180, 140},
377 { 0, 132, 132, 105},
378 }
379 }
380 };
26efc54e
MN
381
382 for (i = 0; i < h; i++) {
383 for (j = 0; j < w; j+=4) {
871371a7
LM
384 tmp[32*i+j+0] = (pix1[j+0] - pix2[j+0])<<4;
385 tmp[32*i+j+1] = (pix1[j+1] - pix2[j+1])<<4;
386 tmp[32*i+j+2] = (pix1[j+2] - pix2[j+2])<<4;
387 tmp[32*i+j+3] = (pix1[j+3] - pix2[j+3])<<4;
26efc54e
MN
388 }
389 pix1 += line_size;
390 pix2 += line_size;
391 }
8b975b7c 392
871371a7 393 ff_spatial_dwt(tmp, w, h, 32, type, dec_count);
26efc54e
MN
394
395 s=0;
871371a7 396 assert(w==h);
26efc54e
MN
397 for(level=0; level<dec_count; level++){
398 for(ori= level ? 1 : 0; ori<4; ori++){
871371a7
LM
399 int size= w>>(dec_count-level);
400 int sx= (ori&1) ? size : 0;
401 int stride= 32<<(dec_count-level);
26efc54e 402 int sy= (ori&2) ? stride>>1 : 0;
115329f1 403
26efc54e
MN
404 for(i=0; i<size; i++){
405 for(j=0; j<size; j++){
406 int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
c26abfa5 407 s += FFABS(v);
26efc54e
MN
408 }
409 }
410 }
411 }
115329f1 412 assert(s>=0);
871371a7 413 return s>>9;
26efc54e
MN
414}
415
416static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
417 return w_c(v, pix1, pix2, line_size, 8, h, 1);
418}
419
420static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
421 return w_c(v, pix1, pix2, line_size, 8, h, 0);
422}
423
424static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
425 return w_c(v, pix1, pix2, line_size, 16, h, 1);
426}
427
428static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
429 return w_c(v, pix1, pix2, line_size, 16, h, 0);
430}
431
486497e0 432int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
871371a7
LM
433 return w_c(v, pix1, pix2, line_size, 32, h, 1);
434}
435
486497e0 436int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
871371a7
LM
437 return w_c(v, pix1, pix2, line_size, 32, h, 0);
438}
3a6fc8fa 439#endif
871371a7 440
5a6a9e78
AJ
441/* draw the edges of width 'w' of an image of size width, height */
442//FIXME check that this is ok for mpeg4 interlaced
443static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
444{
445 uint8_t *ptr, *last_line;
446 int i;
447
448 last_line = buf + (height - 1) * wrap;
449 for(i=0;i<w;i++) {
450 /* top and bottom */
451 memcpy(buf - (i + 1) * wrap, buf, width);
452 memcpy(last_line + (i + 1) * wrap, last_line, width);
453 }
454 /* left and right */
455 ptr = buf;
456 for(i=0;i<height;i++) {
457 memset(ptr - w, ptr[0], w);
458 memset(ptr + width, ptr[width-1], w);
459 ptr += wrap;
460 }
461 /* corners */
462 for(i=0;i<w;i++) {
463 memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
464 memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
465 memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
466 memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
467 }
468}
469
288a44fb
AJ
470/**
471 * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
472 * @param buf destination buffer
473 * @param src source buffer
474 * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
475 * @param block_w width of block
476 * @param block_h height of block
477 * @param src_x x coordinate of the top left sample of the block in the source buffer
478 * @param src_y y coordinate of the top left sample of the block in the source buffer
479 * @param w width of the source buffer
480 * @param h height of the source buffer
481 */
482void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
483 int src_x, int src_y, int w, int h){
484 int x, y;
485 int start_y, start_x, end_y, end_x;
486
487 if(src_y>= h){
488 src+= (h-1-src_y)*linesize;
489 src_y=h-1;
490 }else if(src_y<=-block_h){
491 src+= (1-block_h-src_y)*linesize;
492 src_y=1-block_h;
493 }
494 if(src_x>= w){
495 src+= (w-1-src_x);
496 src_x=w-1;
497 }else if(src_x<=-block_w){
498 src+= (1-block_w-src_x);
499 src_x=1-block_w;
500 }
501
502 start_y= FFMAX(0, -src_y);
503 start_x= FFMAX(0, -src_x);
504 end_y= FFMIN(block_h, h-src_y);
505 end_x= FFMIN(block_w, w-src_x);
506
507 // copy existing part
508 for(y=start_y; y<end_y; y++){
509 for(x=start_x; x<end_x; x++){
510 buf[x + y*linesize]= src[x + y*linesize];
511 }
512 }
513
514 //top
515 for(y=0; y<start_y; y++){
516 for(x=start_x; x<end_x; x++){
517 buf[x + y*linesize]= buf[x + start_y*linesize];
518 }
519 }
520
521 //bottom
522 for(y=end_y; y<block_h; y++){
523 for(x=start_x; x<end_x; x++){
524 buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
525 }
526 }
527
528 for(y=0; y<block_h; y++){
529 //left
530 for(x=0; x<start_x; x++){
531 buf[x + y*linesize]= buf[start_x + y*linesize];
532 }
533
534 //right
535 for(x=end_x; x<block_w; x++){
536 buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
537 }
538 }
539}
540
0c1a9eda 541static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
de6d9b64 542{
de6d9b64
FB
543 int i;
544
545 /* read the pixels */
de6d9b64 546 for(i=0;i<8;i++) {
c13e1abd
FH
547 block[0] = pixels[0];
548 block[1] = pixels[1];
549 block[2] = pixels[2];
550 block[3] = pixels[3];
551 block[4] = pixels[4];
552 block[5] = pixels[5];
553 block[6] = pixels[6];
554 block[7] = pixels[7];
555 pixels += line_size;
556 block += 8;
de6d9b64
FB
557 }
558}
559
0c1a9eda 560static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
bb270c08 561 const uint8_t *s2, int stride){
9dbcbd92
MN
562 int i;
563
564 /* read the pixels */
9dbcbd92 565 for(i=0;i<8;i++) {
c13e1abd
FH
566 block[0] = s1[0] - s2[0];
567 block[1] = s1[1] - s2[1];
568 block[2] = s1[2] - s2[2];
569 block[3] = s1[3] - s2[3];
570 block[4] = s1[4] - s2[4];
571 block[5] = s1[5] - s2[5];
572 block[6] = s1[6] - s2[6];
573 block[7] = s1[7] - s2[7];
9dbcbd92
MN
574 s1 += stride;
575 s2 += stride;
c13e1abd 576 block += 8;
9dbcbd92
MN
577 }
578}
579
580
0c1a9eda 581static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
bb270c08 582 int line_size)
de6d9b64 583{
de6d9b64 584 int i;
55fde95e 585 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 586
de6d9b64 587 /* read the pixels */
de6d9b64 588 for(i=0;i<8;i++) {
c13e1abd
FH
589 pixels[0] = cm[block[0]];
590 pixels[1] = cm[block[1]];
591 pixels[2] = cm[block[2]];
592 pixels[3] = cm[block[3]];
593 pixels[4] = cm[block[4]];
594 pixels[5] = cm[block[5]];
595 pixels[6] = cm[block[6]];
596 pixels[7] = cm[block[7]];
597
598 pixels += line_size;
599 block += 8;
de6d9b64
FB
600 }
601}
602
178fcca8 603static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
bb270c08 604 int line_size)
178fcca8
MN
605{
606 int i;
55fde95e 607 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 608
178fcca8
MN
609 /* read the pixels */
610 for(i=0;i<4;i++) {
611 pixels[0] = cm[block[0]];
612 pixels[1] = cm[block[1]];
613 pixels[2] = cm[block[2]];
614 pixels[3] = cm[block[3]];
615
616 pixels += line_size;
617 block += 8;
618 }
619}
620
9ca358b9 621static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
bb270c08 622 int line_size)
9ca358b9
MN
623{
624 int i;
55fde95e 625 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 626
9ca358b9
MN
627 /* read the pixels */
628 for(i=0;i<2;i++) {
629 pixels[0] = cm[block[0]];
630 pixels[1] = cm[block[1]];
631
632 pixels += line_size;
633 block += 8;
634 }
635}
636
115329f1 637static void put_signed_pixels_clamped_c(const DCTELEM *block,
f9ed9d85
MM
638 uint8_t *restrict pixels,
639 int line_size)
640{
641 int i, j;
642
643 for (i = 0; i < 8; i++) {
644 for (j = 0; j < 8; j++) {
645 if (*block < -128)
646 *pixels = 0;
647 else if (*block > 127)
648 *pixels = 255;
649 else
650 *pixels = (uint8_t)(*block + 128);
651 block++;
652 pixels++;
653 }
654 pixels += (line_size - 8);
655 }
656}
657
0c1a9eda 658static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
c13e1abd 659 int line_size)
de6d9b64 660{
de6d9b64 661 int i;
55fde95e 662 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 663
de6d9b64 664 /* read the pixels */
de6d9b64 665 for(i=0;i<8;i++) {
c13e1abd
FH
666 pixels[0] = cm[pixels[0] + block[0]];
667 pixels[1] = cm[pixels[1] + block[1]];
668 pixels[2] = cm[pixels[2] + block[2]];
669 pixels[3] = cm[pixels[3] + block[3]];
670 pixels[4] = cm[pixels[4] + block[4]];
671 pixels[5] = cm[pixels[5] + block[5]];
672 pixels[6] = cm[pixels[6] + block[6]];
673 pixels[7] = cm[pixels[7] + block[7]];
674 pixels += line_size;
675 block += 8;
de6d9b64
FB
676 }
677}
178fcca8
MN
678
679static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
680 int line_size)
681{
682 int i;
55fde95e 683 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 684
178fcca8
MN
685 /* read the pixels */
686 for(i=0;i<4;i++) {
687 pixels[0] = cm[pixels[0] + block[0]];
688 pixels[1] = cm[pixels[1] + block[1]];
689 pixels[2] = cm[pixels[2] + block[2]];
690 pixels[3] = cm[pixels[3] + block[3]];
691 pixels += line_size;
692 block += 8;
693 }
694}
9ca358b9
MN
695
696static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
697 int line_size)
698{
699 int i;
55fde95e 700 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 701
9ca358b9
MN
702 /* read the pixels */
703 for(i=0;i<2;i++) {
704 pixels[0] = cm[pixels[0] + block[0]];
705 pixels[1] = cm[pixels[1] + block[1]];
706 pixels += line_size;
707 block += 8;
708 }
709}
36940eca
LM
710
711static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
712{
713 int i;
714 for(i=0;i<8;i++) {
715 pixels[0] += block[0];
716 pixels[1] += block[1];
717 pixels[2] += block[2];
718 pixels[3] += block[3];
719 pixels[4] += block[4];
720 pixels[5] += block[5];
721 pixels[6] += block[6];
722 pixels[7] += block[7];
723 pixels += line_size;
724 block += 8;
725 }
726}
727
728static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
729{
730 int i;
731 for(i=0;i<4;i++) {
732 pixels[0] += block[0];
733 pixels[1] += block[1];
734 pixels[2] += block[2];
735 pixels[3] += block[3];
736 pixels += line_size;
737 block += 4;
738 }
739}
740
1edbfe19
LM
741static int sum_abs_dctelem_c(DCTELEM *block)
742{
743 int sum=0, i;
744 for(i=0; i<64; i++)
745 sum+= FFABS(block[i]);
746 return sum;
747}
748
59fe111e
MN
749#if 0
750
751#define PIXOP2(OPNAME, OP) \
b3184779 752static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
753{\
754 int i;\
755 for(i=0; i<h; i++){\
905694d9 756 OP(*((uint64_t*)block), AV_RN64(pixels));\
59fe111e
MN
757 pixels+=line_size;\
758 block +=line_size;\
759 }\
760}\
761\
45553457 762static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
763{\
764 int i;\
765 for(i=0; i<h; i++){\
905694d9
RS
766 const uint64_t a= AV_RN64(pixels );\
767 const uint64_t b= AV_RN64(pixels+1);\
59fe111e
MN
768 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
769 pixels+=line_size;\
770 block +=line_size;\
771 }\
772}\
773\
45553457 774static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
775{\
776 int i;\
777 for(i=0; i<h; i++){\
905694d9
RS
778 const uint64_t a= AV_RN64(pixels );\
779 const uint64_t b= AV_RN64(pixels+1);\
59fe111e
MN
780 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
781 pixels+=line_size;\
782 block +=line_size;\
783 }\
784}\
785\
45553457 786static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
787{\
788 int i;\
789 for(i=0; i<h; i++){\
905694d9
RS
790 const uint64_t a= AV_RN64(pixels );\
791 const uint64_t b= AV_RN64(pixels+line_size);\
59fe111e
MN
792 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
793 pixels+=line_size;\
794 block +=line_size;\
795 }\
796}\
797\
45553457 798static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
799{\
800 int i;\
801 for(i=0; i<h; i++){\
905694d9
RS
802 const uint64_t a= AV_RN64(pixels );\
803 const uint64_t b= AV_RN64(pixels+line_size);\
59fe111e
MN
804 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
805 pixels+=line_size;\
806 block +=line_size;\
807 }\
808}\
809\
45553457 810static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
811{\
812 int i;\
905694d9
RS
813 const uint64_t a= AV_RN64(pixels );\
814 const uint64_t b= AV_RN64(pixels+1);\
59fe111e
MN
815 uint64_t l0= (a&0x0303030303030303ULL)\
816 + (b&0x0303030303030303ULL)\
817 + 0x0202020202020202ULL;\
818 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
819 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
820 uint64_t l1,h1;\
821\
822 pixels+=line_size;\
823 for(i=0; i<h; i+=2){\
905694d9
RS
824 uint64_t a= AV_RN64(pixels );\
825 uint64_t b= AV_RN64(pixels+1);\
59fe111e
MN
826 l1= (a&0x0303030303030303ULL)\
827 + (b&0x0303030303030303ULL);\
828 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
829 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
830 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
831 pixels+=line_size;\
832 block +=line_size;\
905694d9
RS
833 a= AV_RN64(pixels );\
834 b= AV_RN64(pixels+1);\
59fe111e
MN
835 l0= (a&0x0303030303030303ULL)\
836 + (b&0x0303030303030303ULL)\
837 + 0x0202020202020202ULL;\
838 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
839 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
840 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
841 pixels+=line_size;\
842 block +=line_size;\
843 }\
844}\
845\
45553457 846static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
847{\
848 int i;\
905694d9
RS
849 const uint64_t a= AV_RN64(pixels );\
850 const uint64_t b= AV_RN64(pixels+1);\
59fe111e
MN
851 uint64_t l0= (a&0x0303030303030303ULL)\
852 + (b&0x0303030303030303ULL)\
853 + 0x0101010101010101ULL;\
854 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
855 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
856 uint64_t l1,h1;\
857\
858 pixels+=line_size;\
859 for(i=0; i<h; i+=2){\
905694d9
RS
860 uint64_t a= AV_RN64(pixels );\
861 uint64_t b= AV_RN64(pixels+1);\
59fe111e
MN
862 l1= (a&0x0303030303030303ULL)\
863 + (b&0x0303030303030303ULL);\
864 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
865 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
866 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
867 pixels+=line_size;\
868 block +=line_size;\
905694d9
RS
869 a= AV_RN64(pixels );\
870 b= AV_RN64(pixels+1);\
59fe111e
MN
871 l0= (a&0x0303030303030303ULL)\
872 + (b&0x0303030303030303ULL)\
873 + 0x0101010101010101ULL;\
874 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
875 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
876 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
877 pixels+=line_size;\
878 block +=line_size;\
879 }\
880}\
881\
45553457
ZK
882CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
883CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
884CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
885CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
886CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
887CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
888CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
59fe111e
MN
889
890#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
891#else // 64 bit variant
892
893#define PIXOP2(OPNAME, OP) \
669ac79c
MN
894static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
895 int i;\
896 for(i=0; i<h; i++){\
905694d9 897 OP(*((uint16_t*)(block )), AV_RN16(pixels ));\
669ac79c
MN
898 pixels+=line_size;\
899 block +=line_size;\
900 }\
901}\
0da71265
MN
902static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
903 int i;\
904 for(i=0; i<h; i++){\
905694d9 905 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
0da71265
MN
906 pixels+=line_size;\
907 block +=line_size;\
908 }\
909}\
45553457 910static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
59fe111e
MN
911 int i;\
912 for(i=0; i<h; i++){\
905694d9
RS
913 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
914 OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
59fe111e
MN
915 pixels+=line_size;\
916 block +=line_size;\
917 }\
918}\
45553457
ZK
919static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
920 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
b3184779 921}\
59fe111e 922\
b3184779
MN
923static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
924 int src_stride1, int src_stride2, int h){\
59fe111e
MN
925 int i;\
926 for(i=0; i<h; i++){\
b3184779 927 uint32_t a,b;\
905694d9
RS
928 a= AV_RN32(&src1[i*src_stride1 ]);\
929 b= AV_RN32(&src2[i*src_stride2 ]);\
d8085ea7 930 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
905694d9
RS
931 a= AV_RN32(&src1[i*src_stride1+4]);\
932 b= AV_RN32(&src2[i*src_stride2+4]);\
d8085ea7 933 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
59fe111e
MN
934 }\
935}\
936\
b3184779
MN
937static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
938 int src_stride1, int src_stride2, int h){\
59fe111e
MN
939 int i;\
940 for(i=0; i<h; i++){\
b3184779 941 uint32_t a,b;\
905694d9
RS
942 a= AV_RN32(&src1[i*src_stride1 ]);\
943 b= AV_RN32(&src2[i*src_stride2 ]);\
d8085ea7 944 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
905694d9
RS
945 a= AV_RN32(&src1[i*src_stride1+4]);\
946 b= AV_RN32(&src2[i*src_stride2+4]);\
d8085ea7 947 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
59fe111e
MN
948 }\
949}\
950\
0da71265
MN
951static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
952 int src_stride1, int src_stride2, int h){\
953 int i;\
954 for(i=0; i<h; i++){\
955 uint32_t a,b;\
905694d9
RS
956 a= AV_RN32(&src1[i*src_stride1 ]);\
957 b= AV_RN32(&src2[i*src_stride2 ]);\
d8085ea7 958 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
0da71265
MN
959 }\
960}\
961\
669ac79c
MN
962static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
963 int src_stride1, int src_stride2, int h){\
964 int i;\
965 for(i=0; i<h; i++){\
966 uint32_t a,b;\
905694d9
RS
967 a= AV_RN16(&src1[i*src_stride1 ]);\
968 b= AV_RN16(&src2[i*src_stride2 ]);\
669ac79c
MN
969 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
970 }\
971}\
972\
b3184779
MN
973static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
974 int src_stride1, int src_stride2, int h){\
975 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
976 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
977}\
978\
979static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
980 int src_stride1, int src_stride2, int h){\
981 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
982 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
983}\
984\
45553457 985static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
986 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
987}\
988\
45553457 989static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
990 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
991}\
992\
45553457 993static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
994 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
995}\
996\
45553457 997static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
998 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
999}\
1000\
1001static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1002 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
59fe111e
MN
1003 int i;\
1004 for(i=0; i<h; i++){\
b3184779 1005 uint32_t a, b, c, d, l0, l1, h0, h1;\
905694d9
RS
1006 a= AV_RN32(&src1[i*src_stride1]);\
1007 b= AV_RN32(&src2[i*src_stride2]);\
1008 c= AV_RN32(&src3[i*src_stride3]);\
1009 d= AV_RN32(&src4[i*src_stride4]);\
b3184779
MN
1010 l0= (a&0x03030303UL)\
1011 + (b&0x03030303UL)\
1012 + 0x02020202UL;\
1013 h0= ((a&0xFCFCFCFCUL)>>2)\
1014 + ((b&0xFCFCFCFCUL)>>2);\
1015 l1= (c&0x03030303UL)\
1016 + (d&0x03030303UL);\
1017 h1= ((c&0xFCFCFCFCUL)>>2)\
1018 + ((d&0xFCFCFCFCUL)>>2);\
1019 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
905694d9
RS
1020 a= AV_RN32(&src1[i*src_stride1+4]);\
1021 b= AV_RN32(&src2[i*src_stride2+4]);\
1022 c= AV_RN32(&src3[i*src_stride3+4]);\
1023 d= AV_RN32(&src4[i*src_stride4+4]);\
b3184779
MN
1024 l0= (a&0x03030303UL)\
1025 + (b&0x03030303UL)\
1026 + 0x02020202UL;\
1027 h0= ((a&0xFCFCFCFCUL)>>2)\
1028 + ((b&0xFCFCFCFCUL)>>2);\
1029 l1= (c&0x03030303UL)\
1030 + (d&0x03030303UL);\
1031 h1= ((c&0xFCFCFCFCUL)>>2)\
1032 + ((d&0xFCFCFCFCUL)>>2);\
1033 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
59fe111e
MN
1034 }\
1035}\
669ac79c
MN
1036\
1037static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1038 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1039}\
1040\
1041static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1042 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1043}\
1044\
1045static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1046 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1047}\
1048\
1049static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1050 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1051}\
1052\
b3184779
MN
1053static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1054 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
59fe111e
MN
1055 int i;\
1056 for(i=0; i<h; i++){\
b3184779 1057 uint32_t a, b, c, d, l0, l1, h0, h1;\
905694d9
RS
1058 a= AV_RN32(&src1[i*src_stride1]);\
1059 b= AV_RN32(&src2[i*src_stride2]);\
1060 c= AV_RN32(&src3[i*src_stride3]);\
1061 d= AV_RN32(&src4[i*src_stride4]);\
b3184779
MN
1062 l0= (a&0x03030303UL)\
1063 + (b&0x03030303UL)\
1064 + 0x01010101UL;\
1065 h0= ((a&0xFCFCFCFCUL)>>2)\
1066 + ((b&0xFCFCFCFCUL)>>2);\
1067 l1= (c&0x03030303UL)\
1068 + (d&0x03030303UL);\
1069 h1= ((c&0xFCFCFCFCUL)>>2)\
1070 + ((d&0xFCFCFCFCUL)>>2);\
1071 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
905694d9
RS
1072 a= AV_RN32(&src1[i*src_stride1+4]);\
1073 b= AV_RN32(&src2[i*src_stride2+4]);\
1074 c= AV_RN32(&src3[i*src_stride3+4]);\
1075 d= AV_RN32(&src4[i*src_stride4+4]);\
b3184779
MN
1076 l0= (a&0x03030303UL)\
1077 + (b&0x03030303UL)\
1078 + 0x01010101UL;\
1079 h0= ((a&0xFCFCFCFCUL)>>2)\
1080 + ((b&0xFCFCFCFCUL)>>2);\
1081 l1= (c&0x03030303UL)\
1082 + (d&0x03030303UL);\
1083 h1= ((c&0xFCFCFCFCUL)>>2)\
1084 + ((d&0xFCFCFCFCUL)>>2);\
1085 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
59fe111e
MN
1086 }\
1087}\
b3184779
MN
1088static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1089 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1090 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1091 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1092}\
1093static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1094 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1095 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1096 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1097}\
59fe111e 1098\
669ac79c
MN
1099static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1100{\
1101 int i, a0, b0, a1, b1;\
1102 a0= pixels[0];\
1103 b0= pixels[1] + 2;\
1104 a0 += b0;\
1105 b0 += pixels[2];\
1106\
1107 pixels+=line_size;\
1108 for(i=0; i<h; i+=2){\
1109 a1= pixels[0];\
1110 b1= pixels[1];\
1111 a1 += b1;\
1112 b1 += pixels[2];\
1113\
1114 block[0]= (a1+a0)>>2; /* FIXME non put */\
1115 block[1]= (b1+b0)>>2;\
1116\
1117 pixels+=line_size;\
1118 block +=line_size;\
1119\
1120 a0= pixels[0];\
1121 b0= pixels[1] + 2;\
1122 a0 += b0;\
1123 b0 += pixels[2];\
1124\
1125 block[0]= (a1+a0)>>2;\
1126 block[1]= (b1+b0)>>2;\
1127 pixels+=line_size;\
1128 block +=line_size;\
1129 }\
1130}\
1131\
1132static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1133{\
1134 int i;\
905694d9
RS
1135 const uint32_t a= AV_RN32(pixels );\
1136 const uint32_t b= AV_RN32(pixels+1);\
669ac79c
MN
1137 uint32_t l0= (a&0x03030303UL)\
1138 + (b&0x03030303UL)\
1139 + 0x02020202UL;\
1140 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1141 + ((b&0xFCFCFCFCUL)>>2);\
1142 uint32_t l1,h1;\
1143\
1144 pixels+=line_size;\
1145 for(i=0; i<h; i+=2){\
905694d9
RS
1146 uint32_t a= AV_RN32(pixels );\
1147 uint32_t b= AV_RN32(pixels+1);\
669ac79c
MN
1148 l1= (a&0x03030303UL)\
1149 + (b&0x03030303UL);\
1150 h1= ((a&0xFCFCFCFCUL)>>2)\
1151 + ((b&0xFCFCFCFCUL)>>2);\
1152 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1153 pixels+=line_size;\
1154 block +=line_size;\
905694d9
RS
1155 a= AV_RN32(pixels );\
1156 b= AV_RN32(pixels+1);\
669ac79c
MN
1157 l0= (a&0x03030303UL)\
1158 + (b&0x03030303UL)\
1159 + 0x02020202UL;\
1160 h0= ((a&0xFCFCFCFCUL)>>2)\
1161 + ((b&0xFCFCFCFCUL)>>2);\
1162 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1163 pixels+=line_size;\
1164 block +=line_size;\
1165 }\
1166}\
1167\
45553457 1168static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
1169{\
1170 int j;\
1171 for(j=0; j<2; j++){\
1172 int i;\
905694d9
RS
1173 const uint32_t a= AV_RN32(pixels );\
1174 const uint32_t b= AV_RN32(pixels+1);\
59fe111e
MN
1175 uint32_t l0= (a&0x03030303UL)\
1176 + (b&0x03030303UL)\
1177 + 0x02020202UL;\
1178 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1179 + ((b&0xFCFCFCFCUL)>>2);\
1180 uint32_t l1,h1;\
1181\
1182 pixels+=line_size;\
1183 for(i=0; i<h; i+=2){\
905694d9
RS
1184 uint32_t a= AV_RN32(pixels );\
1185 uint32_t b= AV_RN32(pixels+1);\
59fe111e
MN
1186 l1= (a&0x03030303UL)\
1187 + (b&0x03030303UL);\
1188 h1= ((a&0xFCFCFCFCUL)>>2)\
1189 + ((b&0xFCFCFCFCUL)>>2);\
1190 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1191 pixels+=line_size;\
1192 block +=line_size;\
905694d9
RS
1193 a= AV_RN32(pixels );\
1194 b= AV_RN32(pixels+1);\
59fe111e
MN
1195 l0= (a&0x03030303UL)\
1196 + (b&0x03030303UL)\
1197 + 0x02020202UL;\
1198 h0= ((a&0xFCFCFCFCUL)>>2)\
1199 + ((b&0xFCFCFCFCUL)>>2);\
1200 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1201 pixels+=line_size;\
1202 block +=line_size;\
1203 }\
1204 pixels+=4-line_size*(h+1);\
1205 block +=4-line_size*h;\
1206 }\
1207}\
1208\
45553457 1209static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
1210{\
1211 int j;\
1212 for(j=0; j<2; j++){\
1213 int i;\
905694d9
RS
1214 const uint32_t a= AV_RN32(pixels );\
1215 const uint32_t b= AV_RN32(pixels+1);\
59fe111e
MN
1216 uint32_t l0= (a&0x03030303UL)\
1217 + (b&0x03030303UL)\
1218 + 0x01010101UL;\
1219 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1220 + ((b&0xFCFCFCFCUL)>>2);\
1221 uint32_t l1,h1;\
1222\
1223 pixels+=line_size;\
1224 for(i=0; i<h; i+=2){\
905694d9
RS
1225 uint32_t a= AV_RN32(pixels );\
1226 uint32_t b= AV_RN32(pixels+1);\
59fe111e
MN
1227 l1= (a&0x03030303UL)\
1228 + (b&0x03030303UL);\
1229 h1= ((a&0xFCFCFCFCUL)>>2)\
1230 + ((b&0xFCFCFCFCUL)>>2);\
1231 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1232 pixels+=line_size;\
1233 block +=line_size;\
905694d9
RS
1234 a= AV_RN32(pixels );\
1235 b= AV_RN32(pixels+1);\
59fe111e
MN
1236 l0= (a&0x03030303UL)\
1237 + (b&0x03030303UL)\
1238 + 0x01010101UL;\
1239 h0= ((a&0xFCFCFCFCUL)>>2)\
1240 + ((b&0xFCFCFCFCUL)>>2);\
1241 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1242 pixels+=line_size;\
1243 block +=line_size;\
1244 }\
1245 pixels+=4-line_size*(h+1);\
1246 block +=4-line_size*h;\
1247 }\
1248}\
1249\
45553457
ZK
1250CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
1251CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
1252CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
1253CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
1254CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
1255CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
1256CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
1257CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
b3184779 1258
d8085ea7 1259#define op_avg(a, b) a = rnd_avg32(a, b)
59fe111e 1260#endif
59fe111e
MN
1261#define op_put(a, b) a = b
1262
1263PIXOP2(avg, op_avg)
1264PIXOP2(put, op_put)
1265#undef op_avg
1266#undef op_put
1267
de6d9b64
FB
1268#define avg2(a,b) ((a+b+1)>>1)
1269#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
1270
c0a0170c
MN
1271static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
1272 put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
1273}
1274
1275static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
1276 put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
1277}
073b013d 1278
0c1a9eda 1279static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
44eb4951
MN
1280{
1281 const int A=(16-x16)*(16-y16);
1282 const int B=( x16)*(16-y16);
1283 const int C=(16-x16)*( y16);
1284 const int D=( x16)*( y16);
1285 int i;
44eb4951
MN
1286
1287 for(i=0; i<h; i++)
1288 {
b3184779
MN
1289 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
1290 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
1291 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
1292 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
1293 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
1294 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
1295 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
1296 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
1297 dst+= stride;
1298 src+= stride;
44eb4951
MN
1299 }
1300}
1301
703c8195 1302void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
073b013d
MN
1303 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
1304{
1305 int y, vx, vy;
1306 const int s= 1<<shift;
115329f1 1307
073b013d
MN
1308 width--;
1309 height--;
1310
1311 for(y=0; y<h; y++){
1312 int x;
1313
1314 vx= ox;
1315 vy= oy;
1316 for(x=0; x<8; x++){ //XXX FIXME optimize
1317 int src_x, src_y, frac_x, frac_y, index;
1318
1319 src_x= vx>>16;
1320 src_y= vy>>16;
1321 frac_x= src_x&(s-1);
1322 frac_y= src_y&(s-1);
1323 src_x>>=shift;
1324 src_y>>=shift;
115329f1 1325
073b013d
MN
1326 if((unsigned)src_x < width){
1327 if((unsigned)src_y < height){
1328 index= src_x + src_y*stride;
1329 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
1330 + src[index +1]* frac_x )*(s-frac_y)
1331 + ( src[index+stride ]*(s-frac_x)
1332 + src[index+stride+1]* frac_x )* frac_y
1333 + r)>>(shift*2);
1334 }else{
f66e4f5f 1335 index= src_x + av_clip(src_y, 0, height)*stride;
115329f1 1336 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
073b013d
MN
1337 + src[index +1]* frac_x )*s
1338 + r)>>(shift*2);
1339 }
1340 }else{
1341 if((unsigned)src_y < height){
f66e4f5f 1342 index= av_clip(src_x, 0, width) + src_y*stride;
115329f1 1343 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
073b013d
MN
1344 + src[index+stride ]* frac_y )*s
1345 + r)>>(shift*2);
1346 }else{
f66e4f5f 1347 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
073b013d
MN
1348 dst[y*stride + x]= src[index ];
1349 }
1350 }
115329f1 1351
073b013d
MN
1352 vx+= dxx;
1353 vy+= dyx;
1354 }
1355 ox += dxy;
1356 oy += dyy;
1357 }
1358}
669ac79c
MN
1359
1360static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1361 switch(width){
1362 case 2: put_pixels2_c (dst, src, stride, height); break;
1363 case 4: put_pixels4_c (dst, src, stride, height); break;
1364 case 8: put_pixels8_c (dst, src, stride, height); break;
1365 case 16:put_pixels16_c(dst, src, stride, height); break;
1366 }
1367}
1368
1369static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1370 int i,j;
1371 for (i=0; i < height; i++) {
1372 for (j=0; j < width; j++) {
bb270c08 1373 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
669ac79c
MN
1374 }
1375 src += stride;
1376 dst += stride;
1377 }
1378}
1379
1380static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1381 int i,j;
1382 for (i=0; i < height; i++) {
1383 for (j=0; j < width; j++) {
bb270c08 1384 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
669ac79c
MN
1385 }
1386 src += stride;
1387 dst += stride;
1388 }
1389}
115329f1 1390
669ac79c
MN
1391static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1392 int i,j;
1393 for (i=0; i < height; i++) {
1394 for (j=0; j < width; j++) {
bb270c08 1395 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
669ac79c
MN
1396 }
1397 src += stride;
1398 dst += stride;
1399 }
1400}
115329f1 1401
669ac79c
MN
1402static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1403 int i,j;
1404 for (i=0; i < height; i++) {
1405 for (j=0; j < width; j++) {
bb270c08 1406 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
1407 }
1408 src += stride;
1409 dst += stride;
1410 }
1411}
1412
1413static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1414 int i,j;
1415 for (i=0; i < height; i++) {
1416 for (j=0; j < width; j++) {
bb270c08 1417 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
1418 }
1419 src += stride;
1420 dst += stride;
1421 }
1422}
1423
1424static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1425 int i,j;
1426 for (i=0; i < height; i++) {
1427 for (j=0; j < width; j++) {
bb270c08 1428 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
669ac79c
MN
1429 }
1430 src += stride;
1431 dst += stride;
1432 }
1433}
1434
1435static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1436 int i,j;
1437 for (i=0; i < height; i++) {
1438 for (j=0; j < width; j++) {
bb270c08 1439 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
1440 }
1441 src += stride;
1442 dst += stride;
1443 }
1444}
1445
1446static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1447 int i,j;
1448 for (i=0; i < height; i++) {
1449 for (j=0; j < width; j++) {
bb270c08 1450 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
1451 }
1452 src += stride;
1453 dst += stride;
1454 }
1455}
da3b9756
MM
1456
1457static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1458 switch(width){
1459 case 2: avg_pixels2_c (dst, src, stride, height); break;
1460 case 4: avg_pixels4_c (dst, src, stride, height); break;
1461 case 8: avg_pixels8_c (dst, src, stride, height); break;
1462 case 16:avg_pixels16_c(dst, src, stride, height); break;
1463 }
1464}
1465
1466static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1467 int i,j;
1468 for (i=0; i < height; i++) {
1469 for (j=0; j < width; j++) {
bb270c08 1470 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
1471 }
1472 src += stride;
1473 dst += stride;
1474 }
1475}
1476
1477static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1478 int i,j;
1479 for (i=0; i < height; i++) {
1480 for (j=0; j < width; j++) {
bb270c08 1481 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
1482 }
1483 src += stride;
1484 dst += stride;
1485 }
1486}
115329f1 1487
da3b9756
MM
1488static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1489 int i,j;
1490 for (i=0; i < height; i++) {
1491 for (j=0; j < width; j++) {
bb270c08 1492 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
1493 }
1494 src += stride;
1495 dst += stride;
1496 }
1497}
115329f1 1498
da3b9756
MM
1499static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1500 int i,j;
1501 for (i=0; i < height; i++) {
1502 for (j=0; j < width; j++) {
bb270c08 1503 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
1504 }
1505 src += stride;
1506 dst += stride;
1507 }
1508}
1509
1510static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1511 int i,j;
1512 for (i=0; i < height; i++) {
1513 for (j=0; j < width; j++) {
bb270c08 1514 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
1515 }
1516 src += stride;
1517 dst += stride;
1518 }
1519}
1520
1521static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1522 int i,j;
1523 for (i=0; i < height; i++) {
1524 for (j=0; j < width; j++) {
bb270c08 1525 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
1526 }
1527 src += stride;
1528 dst += stride;
1529 }
1530}
1531
1532static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1533 int i,j;
1534 for (i=0; i < height; i++) {
1535 for (j=0; j < width; j++) {
bb270c08 1536 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
1537 }
1538 src += stride;
1539 dst += stride;
1540 }
1541}
1542
1543static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1544 int i,j;
1545 for (i=0; i < height; i++) {
1546 for (j=0; j < width; j++) {
bb270c08 1547 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
1548 }
1549 src += stride;
1550 dst += stride;
1551 }
1552}
669ac79c
MN
1553#if 0
1554#define TPEL_WIDTH(width)\
1555static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1556 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
1557static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1558 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
1559static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1560 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
1561static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1562 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
1563static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1564 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
1565static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1566 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
1567static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1568 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
1569static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1570 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
1571static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1572 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
1573#endif
1574
0da71265
MN
1575#define H264_CHROMA_MC(OPNAME, OP)\
1576static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1577 const int A=(8-x)*(8-y);\
1578 const int B=( x)*(8-y);\
1579 const int C=(8-x)*( y);\
1580 const int D=( x)*( y);\
1581 int i;\
1582 \
1583 assert(x<8 && y<8 && x>=0 && y>=0);\
1584\
febdd0b9 1585 if(D){\
f315b394 1586 for(i=0; i<h; i++){\
76abb18e
MN
1587 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1588 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1589 dst+= stride;\
1590 src+= stride;\
1591 }\
febdd0b9
MN
1592 }else{\
1593 const int E= B+C;\
1594 const int step= C ? stride : 1;\
f315b394 1595 for(i=0; i<h; i++){\
febdd0b9
MN
1596 OP(dst[0], (A*src[0] + E*src[step+0]));\
1597 OP(dst[1], (A*src[1] + E*src[step+1]));\
1598 dst+= stride;\
1599 src+= stride;\
1600 }\
1601 }\
0da71265
MN
1602}\
1603\
1604static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1605 const int A=(8-x)*(8-y);\
1606 const int B=( x)*(8-y);\
1607 const int C=(8-x)*( y);\
1608 const int D=( x)*( y);\
1609 int i;\
1610 \
1611 assert(x<8 && y<8 && x>=0 && y>=0);\
1612\
febdd0b9 1613 if(D){\
f315b394 1614 for(i=0; i<h; i++){\
76abb18e
MN
1615 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1616 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1617 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1618 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1619 dst+= stride;\
1620 src+= stride;\
1621 }\
febdd0b9
MN
1622 }else{\
1623 const int E= B+C;\
1624 const int step= C ? stride : 1;\
f315b394 1625 for(i=0; i<h; i++){\
febdd0b9
MN
1626 OP(dst[0], (A*src[0] + E*src[step+0]));\
1627 OP(dst[1], (A*src[1] + E*src[step+1]));\
1628 OP(dst[2], (A*src[2] + E*src[step+2]));\
1629 OP(dst[3], (A*src[3] + E*src[step+3]));\
1630 dst+= stride;\
1631 src+= stride;\
1632 }\
1633 }\
0da71265
MN
1634}\
1635\
1636static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1637 const int A=(8-x)*(8-y);\
1638 const int B=( x)*(8-y);\
1639 const int C=(8-x)*( y);\
1640 const int D=( x)*( y);\
1641 int i;\
1642 \
1643 assert(x<8 && y<8 && x>=0 && y>=0);\
1644\
815c81c0 1645 if(D){\
f315b394 1646 for(i=0; i<h; i++){\
76abb18e
MN
1647 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1648 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1649 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1650 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1651 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
1652 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
1653 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
1654 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
1655 dst+= stride;\
1656 src+= stride;\
1657 }\
815c81c0
MN
1658 }else{\
1659 const int E= B+C;\
1660 const int step= C ? stride : 1;\
f315b394 1661 for(i=0; i<h; i++){\
815c81c0
MN
1662 OP(dst[0], (A*src[0] + E*src[step+0]));\
1663 OP(dst[1], (A*src[1] + E*src[step+1]));\
1664 OP(dst[2], (A*src[2] + E*src[step+2]));\
1665 OP(dst[3], (A*src[3] + E*src[step+3]));\
1666 OP(dst[4], (A*src[4] + E*src[step+4]));\
1667 OP(dst[5], (A*src[5] + E*src[step+5]));\
1668 OP(dst[6], (A*src[6] + E*src[step+6]));\
1669 OP(dst[7], (A*src[7] + E*src[step+7]));\
1670 dst+= stride;\
1671 src+= stride;\
1672 }\
1673 }\
0da71265
MN
1674}
1675
1676#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
1677#define op_put(a, b) a = (((b) + 32)>>6)
1678
1679H264_CHROMA_MC(put_ , op_put)
1680H264_CHROMA_MC(avg_ , op_avg)
1681#undef op_avg
1682#undef op_put
1683
c374691b 1684static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
e34350a3
KS
1685 const int A=(8-x)*(8-y);
1686 const int B=( x)*(8-y);
1687 const int C=(8-x)*( y);
1688 const int D=( x)*( y);
1689 int i;
1690
1691 assert(x<8 && y<8 && x>=0 && y>=0);
1692
1693 for(i=0; i<h; i++)
1694 {
1695 dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6;
1696 dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6;
1697 dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6;
1698 dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6;
1699 dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6;
1700 dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6;
1701 dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6;
1702 dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6;
1703 dst+= stride;
1704 src+= stride;
1705 }
1706}
1707
8013da73
DC
1708static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
1709 const int A=(8-x)*(8-y);
1710 const int B=( x)*(8-y);
1711 const int C=(8-x)*( y);
1712 const int D=( x)*( y);
1713 int i;
1714
1715 assert(x<8 && y<8 && x>=0 && y>=0);
1716
1717 for(i=0; i<h; i++)
1718 {
1719 dst[0] = avg2(dst[0], ((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6));
1720 dst[1] = avg2(dst[1], ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6));
1721 dst[2] = avg2(dst[2], ((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6));
1722 dst[3] = avg2(dst[3], ((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6));
1723 dst[4] = avg2(dst[4], ((A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6));
1724 dst[5] = avg2(dst[5], ((A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6));
1725 dst[6] = avg2(dst[6], ((A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6));
1726 dst[7] = avg2(dst[7], ((A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6));
1727 dst+= stride;
1728 src+= stride;
1729 }
1730}
1731
b3184779 1732#define QPEL_MC(r, OPNAME, RND, OP) \
0c1a9eda 1733static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
55fde95e 1734 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779
MN
1735 int i;\
1736 for(i=0; i<h; i++)\
1737 {\
1738 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
1739 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
1740 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
1741 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
1742 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
1743 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
1744 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
1745 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
1746 dst+=dstStride;\
1747 src+=srcStride;\
1748 }\
44eb4951
MN
1749}\
1750\
0c1a9eda 1751static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
db794953 1752 const int w=8;\
55fde95e 1753 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779
MN
1754 int i;\
1755 for(i=0; i<w; i++)\
1756 {\
1757 const int src0= src[0*srcStride];\
1758 const int src1= src[1*srcStride];\
1759 const int src2= src[2*srcStride];\
1760 const int src3= src[3*srcStride];\
1761 const int src4= src[4*srcStride];\
1762 const int src5= src[5*srcStride];\
1763 const int src6= src[6*srcStride];\
1764 const int src7= src[7*srcStride];\
1765 const int src8= src[8*srcStride];\
1766 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
1767 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
1768 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
1769 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
1770 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
1771 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
1772 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
1773 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
1774 dst++;\
1775 src++;\
1776 }\
1777}\
1778\
0c1a9eda 1779static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
55fde95e 1780 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779 1781 int i;\
826f429a 1782 \
b3184779
MN
1783 for(i=0; i<h; i++)\
1784 {\
1785 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
1786 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
1787 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
1788 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
1789 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
1790 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
1791 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
1792 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
1793 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
1794 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
1795 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
1796 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
1797 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
1798 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
1799 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
1800 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
1801 dst+=dstStride;\
1802 src+=srcStride;\
1803 }\
1804}\
1805\
0c1a9eda 1806static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
55fde95e 1807 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779 1808 int i;\
826f429a 1809 const int w=16;\
b3184779
MN
1810 for(i=0; i<w; i++)\
1811 {\
1812 const int src0= src[0*srcStride];\
1813 const int src1= src[1*srcStride];\
1814 const int src2= src[2*srcStride];\
1815 const int src3= src[3*srcStride];\
1816 const int src4= src[4*srcStride];\
1817 const int src5= src[5*srcStride];\
1818 const int src6= src[6*srcStride];\
1819 const int src7= src[7*srcStride];\
1820 const int src8= src[8*srcStride];\
1821 const int src9= src[9*srcStride];\
1822 const int src10= src[10*srcStride];\
1823 const int src11= src[11*srcStride];\
1824 const int src12= src[12*srcStride];\
1825 const int src13= src[13*srcStride];\
1826 const int src14= src[14*srcStride];\
1827 const int src15= src[15*srcStride];\
1828 const int src16= src[16*srcStride];\
1829 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
1830 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
1831 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
1832 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
1833 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
1834 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
1835 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
1836 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
1837 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
1838 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
1839 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
1840 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
1841 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
1842 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
1843 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
1844 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
1845 dst++;\
1846 src++;\
1847 }\
1848}\
1849\
0c1a9eda 1850static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
45553457 1851 OPNAME ## pixels8_c(dst, src, stride, 8);\
b3184779
MN
1852}\
1853\
0c1a9eda
ZK
1854static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1855 uint8_t half[64];\
b3184779
MN
1856 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1857 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
44eb4951
MN
1858}\
1859\
0c1a9eda 1860static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
b3184779 1861 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
44eb4951
MN
1862}\
1863\
0c1a9eda
ZK
1864static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1865 uint8_t half[64];\
b3184779
MN
1866 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1867 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
44eb4951
MN
1868}\
1869\
0c1a9eda
ZK
1870static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1871 uint8_t full[16*9];\
1872 uint8_t half[64];\
b3184779 1873 copy_block9(full, src, 16, stride, 9);\
db794953 1874 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
b3184779 1875 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
44eb4951
MN
1876}\
1877\
0c1a9eda
ZK
1878static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1879 uint8_t full[16*9];\
b3184779 1880 copy_block9(full, src, 16, stride, 9);\
db794953 1881 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
44eb4951
MN
1882}\
1883\
0c1a9eda
ZK
1884static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1885 uint8_t full[16*9];\
1886 uint8_t half[64];\
b3184779 1887 copy_block9(full, src, 16, stride, 9);\
db794953 1888 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
b3184779 1889 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
44eb4951 1890}\
0c1a9eda
ZK
1891void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1892 uint8_t full[16*9];\
1893 uint8_t halfH[72];\
1894 uint8_t halfV[64];\
1895 uint8_t halfHV[64];\
b3184779
MN
1896 copy_block9(full, src, 16, stride, 9);\
1897 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1898 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1899 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1900 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1901}\
0c1a9eda
ZK
1902static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1903 uint8_t full[16*9];\
1904 uint8_t halfH[72];\
1905 uint8_t halfHV[64];\
db794953
MN
1906 copy_block9(full, src, 16, stride, 9);\
1907 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1908 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1909 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1910 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1911}\
0c1a9eda
ZK
1912void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1913 uint8_t full[16*9];\
1914 uint8_t halfH[72];\
1915 uint8_t halfV[64];\
1916 uint8_t halfHV[64];\
b3184779
MN
1917 copy_block9(full, src, 16, stride, 9);\
1918 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1919 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1920 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1921 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1922}\
0c1a9eda
ZK
1923static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1924 uint8_t full[16*9];\
1925 uint8_t halfH[72];\
1926 uint8_t halfHV[64];\
db794953
MN
1927 copy_block9(full, src, 16, stride, 9);\
1928 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1929 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1930 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1931 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1932}\
0c1a9eda
ZK
1933void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1934 uint8_t full[16*9];\
1935 uint8_t halfH[72];\
1936 uint8_t halfV[64];\
1937 uint8_t halfHV[64];\
b3184779
MN
1938 copy_block9(full, src, 16, stride, 9);\
1939 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1940 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1941 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1942 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1943}\
0c1a9eda
ZK
1944static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1945 uint8_t full[16*9];\
1946 uint8_t halfH[72];\
1947 uint8_t halfHV[64];\
db794953
MN
1948 copy_block9(full, src, 16, stride, 9);\
1949 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1950 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1951 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1952 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1953}\
0c1a9eda
ZK
1954void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1955 uint8_t full[16*9];\
1956 uint8_t halfH[72];\
1957 uint8_t halfV[64];\
1958 uint8_t halfHV[64];\
b3184779
MN
1959 copy_block9(full, src, 16, stride, 9);\
1960 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
db794953
MN
1961 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1962 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1963 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1964}\
0c1a9eda
ZK
1965static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1966 uint8_t full[16*9];\
1967 uint8_t halfH[72];\
1968 uint8_t halfHV[64];\
db794953
MN
1969 copy_block9(full, src, 16, stride, 9);\
1970 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1971 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1972 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1973 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1974}\
0c1a9eda
ZK
1975static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1976 uint8_t halfH[72];\
1977 uint8_t halfHV[64];\
b3184779 1978 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1979 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1980 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
44eb4951 1981}\
0c1a9eda
ZK
1982static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1983 uint8_t halfH[72];\
1984 uint8_t halfHV[64];\
b3184779 1985 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 1986 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1987 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
44eb4951 1988}\
0c1a9eda
ZK
1989void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1990 uint8_t full[16*9];\
1991 uint8_t halfH[72];\
1992 uint8_t halfV[64];\
1993 uint8_t halfHV[64];\
b3184779
MN
1994 copy_block9(full, src, 16, stride, 9);\
1995 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1996 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1997 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1998 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 1999}\
0c1a9eda
ZK
2000static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2001 uint8_t full[16*9];\
2002 uint8_t halfH[72];\
db794953
MN
2003 copy_block9(full, src, 16, stride, 9);\
2004 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
2005 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
2006 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
2007}\
0c1a9eda
ZK
2008void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
2009 uint8_t full[16*9];\
2010 uint8_t halfH[72];\
2011 uint8_t halfV[64];\
2012 uint8_t halfHV[64];\
b3184779
MN
2013 copy_block9(full, src, 16, stride, 9);\
2014 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
2015 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
2016 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 2017 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 2018}\
0c1a9eda
ZK
2019static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2020 uint8_t full[16*9];\
2021 uint8_t halfH[72];\
db794953
MN
2022 copy_block9(full, src, 16, stride, 9);\
2023 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
2024 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
2025 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
2026}\
0c1a9eda
ZK
2027static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2028 uint8_t halfH[72];\
b3184779 2029 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 2030 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
b3184779 2031}\
0c1a9eda 2032static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
45553457 2033 OPNAME ## pixels16_c(dst, src, stride, 16);\
b3184779
MN
2034}\
2035\
0c1a9eda
ZK
2036static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2037 uint8_t half[256];\
b3184779
MN
2038 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
2039 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
2040}\
2041\
0c1a9eda 2042static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
b3184779 2043 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
44eb4951 2044}\
b3184779 2045\
0c1a9eda
ZK
2046static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2047 uint8_t half[256];\
b3184779
MN
2048 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
2049 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
2050}\
2051\
0c1a9eda
ZK
2052static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2053 uint8_t full[24*17];\
2054 uint8_t half[256];\
b3184779 2055 copy_block17(full, src, 24, stride, 17);\
826f429a 2056 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
b3184779
MN
2057 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
2058}\
2059\
0c1a9eda
ZK
2060static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2061 uint8_t full[24*17];\
b3184779 2062 copy_block17(full, src, 24, stride, 17);\
826f429a 2063 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
b3184779
MN
2064}\
2065\
0c1a9eda
ZK
2066static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2067 uint8_t full[24*17];\
2068 uint8_t half[256];\
b3184779 2069 copy_block17(full, src, 24, stride, 17);\
826f429a 2070 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
b3184779
MN
2071 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
2072}\
0c1a9eda
ZK
2073void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
2074 uint8_t full[24*17];\
2075 uint8_t halfH[272];\
2076 uint8_t halfV[256];\
2077 uint8_t halfHV[256];\
b3184779
MN
2078 copy_block17(full, src, 24, stride, 17);\
2079 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
2080 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2081 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
2082 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2083}\
0c1a9eda
ZK
2084static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2085 uint8_t full[24*17];\
2086 uint8_t halfH[272];\
2087 uint8_t halfHV[256];\
db794953
MN
2088 copy_block17(full, src, 24, stride, 17);\
2089 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2090 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2091 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2092 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2093}\
0c1a9eda
ZK
2094void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
2095 uint8_t full[24*17];\
2096 uint8_t halfH[272];\
2097 uint8_t halfV[256];\
2098 uint8_t halfHV[256];\
b3184779
MN
2099 copy_block17(full, src, 24, stride, 17);\
2100 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
2101 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2102 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
2103 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2104}\
0c1a9eda
ZK
2105static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2106 uint8_t full[24*17];\
2107 uint8_t halfH[272];\
2108 uint8_t halfHV[256];\
db794953
MN
2109 copy_block17(full, src, 24, stride, 17);\
2110 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2111 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2112 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2113 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2114}\
0c1a9eda
ZK
2115void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
2116 uint8_t full[24*17];\
2117 uint8_t halfH[272];\
2118 uint8_t halfV[256];\
2119 uint8_t halfHV[256];\
b3184779
MN
2120 copy_block17(full, src, 24, stride, 17);\
2121 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
2122 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2123 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
2124 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2125}\
0c1a9eda
ZK
2126static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2127 uint8_t full[24*17];\
2128 uint8_t halfH[272];\
2129 uint8_t halfHV[256];\
db794953
MN
2130 copy_block17(full, src, 24, stride, 17);\
2131 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2132 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2133 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2134 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2135}\
0c1a9eda
ZK
2136void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
2137 uint8_t full[24*17];\
2138 uint8_t halfH[272];\
2139 uint8_t halfV[256];\
2140 uint8_t halfHV[256];\
b3184779
MN
2141 copy_block17(full, src, 24, stride, 17);\
2142 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
826f429a
MN
2143 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2144 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
2145 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2146}\
0c1a9eda
ZK
2147static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2148 uint8_t full[24*17];\
2149 uint8_t halfH[272];\
2150 uint8_t halfHV[256];\
db794953
MN
2151 copy_block17(full, src, 24, stride, 17);\
2152 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2153 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2154 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2155 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2156}\
0c1a9eda
ZK
2157static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2158 uint8_t halfH[272];\
2159 uint8_t halfHV[256];\
b3184779 2160 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 2161 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
2162 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2163}\
0c1a9eda
ZK
2164static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2165 uint8_t halfH[272];\
2166 uint8_t halfHV[256];\
b3184779 2167 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 2168 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
2169 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2170}\
0c1a9eda
ZK
2171void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
2172 uint8_t full[24*17];\
2173 uint8_t halfH[272];\
2174 uint8_t halfV[256];\
2175 uint8_t halfHV[256];\
b3184779
MN
2176 copy_block17(full, src, 24, stride, 17);\
2177 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
2178 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2179 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
2180 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2181}\
0c1a9eda
ZK
2182static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2183 uint8_t full[24*17];\
2184 uint8_t halfH[272];\
db794953
MN
2185 copy_block17(full, src, 24, stride, 17);\
2186 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2187 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2188 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2189}\
0c1a9eda
ZK
2190void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
2191 uint8_t full[24*17];\
2192 uint8_t halfH[272];\
2193 uint8_t halfV[256];\
2194 uint8_t halfHV[256];\
b3184779
MN
2195 copy_block17(full, src, 24, stride, 17);\
2196 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
2197 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2198 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
2199 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2200}\
0c1a9eda
ZK
2201static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2202 uint8_t full[24*17];\
2203 uint8_t halfH[272];\
db794953
MN
2204 copy_block17(full, src, 24, stride, 17);\
2205 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2206 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2207 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2208}\
0c1a9eda
ZK
2209static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2210 uint8_t halfH[272];\
b3184779 2211 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 2212 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
45553457 2213}
44eb4951 2214
b3184779
MN
2215#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2216#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
2217#define op_put(a, b) a = cm[((b) + 16)>>5]
2218#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
2219
2220QPEL_MC(0, put_ , _ , op_put)
2221QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
2222QPEL_MC(0, avg_ , _ , op_avg)
2223//QPEL_MC(1, avg_no_rnd , _ , op_avg)
2224#undef op_avg
2225#undef op_avg_no_rnd
2226#undef op_put
2227#undef op_put_no_rnd
44eb4951 2228
0da71265
MN
2229#if 1
2230#define H264_LOWPASS(OPNAME, OP, OP2) \
bb5705b9 2231static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
80e44bc3 2232 const int h=2;\
55fde95e 2233 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
80e44bc3
MN
2234 int i;\
2235 for(i=0; i<h; i++)\
2236 {\
2237 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2238 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2239 dst+=dstStride;\
2240 src+=srcStride;\
2241 }\
2242}\
2243\
bb5705b9 2244static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
80e44bc3 2245 const int w=2;\
55fde95e 2246 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
80e44bc3
MN
2247 int i;\
2248 for(i=0; i<w; i++)\
2249 {\
2250 const int srcB= src[-2*srcStride];\
2251 const int srcA= src[-1*srcStride];\
2252 const int src0= src[0 *srcStride];\
2253 const int src1= src[1 *srcStride];\
2254 const int src2= src[2 *srcStride];\
2255 const int src3= src[3 *srcStride];\
2256 const int src4= src[4 *srcStride];\
2257 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2258 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2259 dst++;\
2260 src++;\
2261 }\
2262}\
2263\
bb5705b9 2264static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
80e44bc3
MN
2265 const int h=2;\
2266 const int w=2;\
55fde95e 2267 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
80e44bc3
MN
2268 int i;\
2269 src -= 2*srcStride;\
2270 for(i=0; i<h+5; i++)\
2271 {\
2272 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2273 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2274 tmp+=tmpStride;\
2275 src+=srcStride;\
2276 }\
2277 tmp -= tmpStride*(h+5-2);\
2278 for(i=0; i<w; i++)\
2279 {\
2280 const int tmpB= tmp[-2*tmpStride];\
2281 const int tmpA= tmp[-1*tmpStride];\
2282 const int tmp0= tmp[0 *tmpStride];\
2283 const int tmp1= tmp[1 *tmpStride];\
2284 const int tmp2= tmp[2 *tmpStride];\
2285 const int tmp3= tmp[3 *tmpStride];\
2286 const int tmp4= tmp[4 *tmpStride];\
2287 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2288 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2289 dst++;\
2290 tmp++;\
2291 }\
2292}\
0da71265
MN
2293static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2294 const int h=4;\
55fde95e 2295 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
0da71265
MN
2296 int i;\
2297 for(i=0; i<h; i++)\
2298 {\
2299 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2300 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2301 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
2302 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
2303 dst+=dstStride;\
2304 src+=srcStride;\
2305 }\
2306}\
2307\
2308static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2309 const int w=4;\
55fde95e 2310 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
0da71265
MN
2311 int i;\
2312 for(i=0; i<w; i++)\
2313 {\
2314 const int srcB= src[-2*srcStride];\
2315 const int srcA= src[-1*srcStride];\
2316 const int src0= src[0 *srcStride];\
2317 const int src1= src[1 *srcStride];\
2318 const int src2= src[2 *srcStride];\
2319 const int src3= src[3 *srcStride];\
2320 const int src4= src[4 *srcStride];\
2321 const int src5= src[5 *srcStride];\
2322 const int src6= src[6 *srcStride];\
2323 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2324 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2325 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2326 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2327 dst++;\
2328 src++;\
2329 }\
2330}\
2331\
2332static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2333 const int h=4;\
2334 const int w=4;\
55fde95e 2335 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
0da71265
MN
2336 int i;\
2337 src -= 2*srcStride;\
2338 for(i=0; i<h+5; i++)\
2339 {\
2340 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2341 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2342 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
2343 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
2344 tmp+=tmpStride;\
2345 src+=srcStride;\
2346 }\
2347 tmp -= tmpStride*(h+5-2);\
2348 for(i=0; i<w; i++)\
2349 {\
2350 const int tmpB= tmp[-2*tmpStride];\
2351 const int tmpA= tmp[-1*tmpStride];\
2352 const int tmp0= tmp[0 *tmpStride];\
2353 const int tmp1= tmp[1 *tmpStride];\
2354 const int tmp2= tmp[2 *tmpStride];\
2355 const int tmp3= tmp[3 *tmpStride];\
2356 const int tmp4= tmp[4 *tmpStride];\
2357 const int tmp5= tmp[5 *tmpStride];\
2358 const int tmp6= tmp[6 *tmpStride];\
2359 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2360 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2361 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2362 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2363 dst++;\
2364 tmp++;\
2365 }\
2366}\
2367\
2368static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2369 const int h=8;\
55fde95e 2370 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
0da71265
MN
2371 int i;\
2372 for(i=0; i<h; i++)\
2373 {\
2374 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
2375 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
2376 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
2377 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
2378 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
2379 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
2380 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
2381 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
2382 dst+=dstStride;\
2383 src+=srcStride;\
2384 }\
2385}\
2386\
2387static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2388 const int w=8;\
55fde95e 2389 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
0da71265
MN
2390 int i;\
2391 for(i=0; i<w; i++)\
2392 {\
2393 const int srcB= src[-2*srcStride];\
2394 const int srcA= src[-1*srcStride];\
2395 const int src0= src[0 *srcStride];\
2396 const int src1= src[1 *srcStride];\
2397 const int src2= src[2 *srcStride];\
2398 const int src3= src[3 *srcStride];\
2399 const int src4= src[4 *srcStride];\
2400 const int src5= src[5 *srcStride];\
2401 const int src6= src[6 *srcStride];\
2402 const int src7= src[7 *srcStride];\
2403 const int src8= src[8 *srcStride];\
2404 const int src9= src[9 *srcStride];\
2405 const int src10=src[10*srcStride];\
2406 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2407 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2408 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2409 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2410 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
2411 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
2412 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
2413 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
2414 dst++;\
2415 src++;\
2416 }\
2417}\
2418\
2419static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2420 const int h=8;\
2421 const int w=8;\
55fde95e 2422 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
0da71265
MN
2423 int i;\
2424 src -= 2*srcStride;\
2425 for(i=0; i<h+5; i++)\
2426 {\
2427 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
2428 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
2429 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
2430 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
2431 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
2432 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
2433 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
2434 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
2435 tmp+=tmpStride;\
2436 src+=srcStride;\
2437 }\
2438 tmp -= tmpStride*(h+5-2);\
2439 for(i=0; i<w; i++)\
2440 {\
2441 const int tmpB= tmp[-2*tmpStride];\
2442 const int tmpA= tmp[-1*tmpStride];\
2443 const int tmp0= tmp[0 *tmpStride];\
2444 const int tmp1= tmp[1 *tmpStride];\
2445 const int tmp2= tmp[2 *tmpStride];\
2446 const int tmp3= tmp[3 *tmpStride];\
2447 const int tmp4= tmp[4 *tmpStride];\
2448 const int tmp5= tmp[5 *tmpStride];\
2449 const int tmp6= tmp[6 *tmpStride];\
2450 const int tmp7= tmp[7 *tmpStride];\
2451 const int tmp8= tmp[8 *tmpStride];\
2452 const int tmp9= tmp[9 *tmpStride];\
2453 const int tmp10=tmp[10*tmpStride];\
2454 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2455 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2456 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2457 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2458 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
2459 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
2460 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
2461 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
2462 dst++;\
2463 tmp++;\
2464 }\
2465}\
2466\
2467static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2468 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
2469 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2470 src += 8*srcStride;\
2471 dst += 8*dstStride;\
2472 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
2473 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2474}\
2475\
2476static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2477 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
2478 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2479 src += 8*srcStride;\
2480 dst += 8*dstStride;\
2481 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
2482 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2483}\
2484\
2485static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2486 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2487 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2488 src += 8*srcStride;\
0da71265
MN
2489 dst += 8*dstStride;\
2490 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2491 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2492}\
2493
2494#define H264_MC(OPNAME, SIZE) \
2495static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
2496 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
2497}\
2498\
2499static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2500 uint8_t half[SIZE*SIZE];\
2501 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2502 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
2503}\
2504\
2505static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
2506 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
2507}\
2508\
2509static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2510 uint8_t half[SIZE*SIZE];\
2511 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2512 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
2513}\
2514\
2515static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2516 uint8_t full[SIZE*(SIZE+5)];\
2517 uint8_t * const full_mid= full + SIZE*2;\
2518 uint8_t half[SIZE*SIZE];\
2519 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2520 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2521 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
2522}\
2523\
2524static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2525 uint8_t full[SIZE*(SIZE+5)];\
2526 uint8_t * const full_mid= full + SIZE*2;\
2527 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2528 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
2529}\
2530\
2531static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2532 uint8_t full[SIZE*(SIZE+5)];\
2533 uint8_t * const full_mid= full + SIZE*2;\
2534 uint8_t half[SIZE*SIZE];\
2535 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2536 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2537 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
2538}\
2539\
2540static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2541 uint8_t full[SIZE*(SIZE+5)];\
2542 uint8_t * const full_mid= full + SIZE*2;\
2543 uint8_t halfH[SIZE*SIZE];\
2544 uint8_t halfV[SIZE*SIZE];\
2545 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2546 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2547 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2548 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2549}\
2550\
2551static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2552 uint8_t full[SIZE*(SIZE+5)];\
2553 uint8_t * const full_mid= full + SIZE*2;\
2554 uint8_t halfH[SIZE*SIZE];\
2555 uint8_t halfV[SIZE*SIZE];\
2556 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2557 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2558 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2559 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2560}\
2561\
2562static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2563 uint8_t full[SIZE*(SIZE+5)];\
2564 uint8_t * const full_mid= full + SIZE*2;\
2565 uint8_t halfH[SIZE*SIZE];\
2566 uint8_t halfV[SIZE*SIZE];\
2567 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2568 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2569 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2570 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2571}\
2572\
2573static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2574 uint8_t full[SIZE*(SIZE+5)];\
2575 uint8_t * const full_mid= full + SIZE*2;\
2576 uint8_t halfH[SIZE*SIZE];\
2577 uint8_t halfV[SIZE*SIZE];\
2578 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2579 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2580 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2581 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2582}\
2583\
2584static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2585 int16_t tmp[SIZE*(SIZE+5)];\
2586 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
2587}\
2588\
2589static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2590 int16_t tmp[SIZE*(SIZE+5)];\
2591 uint8_t halfH[SIZE*SIZE];\
2592 uint8_t halfHV[SIZE*SIZE];\
2593 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2594 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2595 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2596}\
2597\
2598static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2599 int16_t tmp[SIZE*(SIZE+5)];\
2600 uint8_t halfH[SIZE*SIZE];\
2601 uint8_t halfHV[SIZE*SIZE];\
2602 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2603 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2604 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2605}\
2606\
2607static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2608 uint8_t full[SIZE*(SIZE+5)];\
2609 uint8_t * const full_mid= full + SIZE*2;\
2610 int16_t tmp[SIZE*(SIZE+5)];\
2611 uint8_t halfV[SIZE*SIZE];\
2612 uint8_t halfHV[SIZE*SIZE];\
2613 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2614 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2615 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2616 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2617}\
2618\
2619static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2620 uint8_t full[SIZE*(SIZE+5)];\
2621 uint8_t * const full_mid= full + SIZE*2;\
2622 int16_t tmp[SIZE*(SIZE+5)];\
2623 uint8_t halfV[SIZE*SIZE];\
2624 uint8_t halfHV[SIZE*SIZE];\
2625 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2626 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2627 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2628 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2629}\
2630
2631#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2632//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
2633#define op_put(a, b) a = cm[((b) + 16)>>5]
2634#define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
2635#define op2_put(a, b) a = cm[((b) + 512)>>10]
2636
2637H264_LOWPASS(put_ , op_put, op2_put)
2638H264_LOWPASS(avg_ , op_avg, op2_avg)
80e44bc3 2639H264_MC(put_, 2)
0da71265
MN
2640H264_MC(put_, 4)
2641H264_MC(put_, 8)
2642H264_MC(put_, 16)
2643H264_MC(avg_, 4)
2644H264_MC(avg_, 8)
2645H264_MC(avg_, 16)
2646
2647#undef op_avg
2648#undef op_put
2649#undef op2_avg
2650#undef op2_put
2651#endif
2652
f66e4f5f
RD
2653#define op_scale1(x) block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom )
2654#define op_scale2(x) dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
9f2d1b4f
LM
2655#define H264_WEIGHT(W,H) \
2656static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
e8b56208 2657 int y; \
9f2d1b4f
LM
2658 offset <<= log2_denom; \
2659 if(log2_denom) offset += 1<<(log2_denom-1); \
2660 for(y=0; y<H; y++, block += stride){ \
2661 op_scale1(0); \
2662 op_scale1(1); \
2663 if(W==2) continue; \
2664 op_scale1(2); \
2665 op_scale1(3); \
2666 if(W==4) continue; \
2667 op_scale1(4); \
2668 op_scale1(5); \
2669 op_scale1(6); \
2670 op_scale1(7); \
2671 if(W==8) continue; \
2672 op_scale1(8); \
2673 op_scale1(9); \
2674 op_scale1(10); \
2675 op_scale1(11); \
2676 op_scale1(12); \
2677 op_scale1(13); \
2678 op_scale1(14); \
2679 op_scale1(15); \
2680 } \
2681} \
e8b56208
LM
2682static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
2683 int y; \
2684 offset = ((offset + 1) | 1) << log2_denom; \
9f2d1b4f
LM
2685 for(y=0; y<H; y++, dst += stride, src += stride){ \
2686 op_scale2(0); \
2687 op_scale2(1); \
2688 if(W==2) continue; \
2689 op_scale2(2); \
2690 op_scale2(3); \
2691 if(W==4) continue; \
2692 op_scale2(4); \
2693 op_scale2(5); \
2694 op_scale2(6); \
2695 op_scale2(7); \
2696 if(W==8) continue; \
2697 op_scale2(8); \
2698 op_scale2(9); \
2699 op_scale2(10); \
2700 op_scale2(11); \
2701 op_scale2(12); \
2702 op_scale2(13); \
2703 op_scale2(14); \
2704 op_scale2(15); \
2705 } \
2706}
2707
2708H264_WEIGHT(16,16)
2709H264_WEIGHT(16,8)
2710H264_WEIGHT(8,16)
2711H264_WEIGHT(8,8)
2712H264_WEIGHT(8,4)
2713H264_WEIGHT(4,8)
2714H264_WEIGHT(4,4)
2715H264_WEIGHT(4,2)
2716H264_WEIGHT(2,4)
2717H264_WEIGHT(2,2)
2718
2719#undef op_scale1
2720#undef op_scale2
2721#undef H264_WEIGHT
2722
1457ab52 2723static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
55fde95e 2724 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1457ab52
MN
2725 int i;
2726
2727 for(i=0; i<h; i++){
2728 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
2729 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
2730 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
2731 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
2732 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
2733 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
2734 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
2735 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
2736 dst+=dstStride;
115329f1 2737 src+=srcStride;
1457ab52
MN
2738 }
2739}
2740
b250f9c6 2741#if CONFIG_CAVS_DECODER
b482e2d1
MN
2742/* AVS specific */
2743void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx);
2744
2745void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
2746 put_pixels8_c(dst, src, stride, 8);
2747}
2748void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
2749 avg_pixels8_c(dst, src, stride, 8);
2750}
2751void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
2752 put_pixels16_c(dst, src, stride, 16);
2753}
2754void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
2755 avg_pixels16_c(dst, src, stride, 16);
2756}
29c5cdca 2757#endif /* CONFIG_CAVS_DECODER */
b482e2d1 2758
da00b525 2759void ff_mlp_init(DSPContext* c, AVCodecContext *avctx);
bf4f19dc 2760
9be6f0d2 2761#if CONFIG_VC1_DECODER
64db55ae
KS
2762/* VC-1 specific */
2763void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx);
2764
2765void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) {
74691b7b
KS
2766 put_pixels8_c(dst, src, stride, 8);
2767}
6cecd630
DC
2768void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) {
2769 avg_pixels8_c(dst, src, stride, 8);
2770}
9be6f0d2 2771#endif /* CONFIG_VC1_DECODER */
64db55ae 2772
9abc7e0f 2773void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
9abc7e0f 2774
c6b237da 2775/* H264 specific */
edecaff8 2776void ff_h264dspenc_init(DSPContext* c, AVCodecContext *avctx);
c6b237da 2777
b250f9c6 2778#if CONFIG_RV30_DECODER
6beb8b26
KS
2779void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx);
2780#endif /* CONFIG_RV30_DECODER */
2781
b250f9c6 2782#if CONFIG_RV40_DECODER
2d8a0815
KS
2783static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2784 put_pixels16_xy2_c(dst, src, stride, 16);
2785}
2786static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2787 avg_pixels16_xy2_c(dst, src, stride, 16);
2788}
2789static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2790 put_pixels8_xy2_c(dst, src, stride, 8);
2791}
2792static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2793 avg_pixels8_xy2_c(dst, src, stride, 8);
2794}
2795
2796void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx);
2797#endif /* CONFIG_RV40_DECODER */
2798
1457ab52 2799static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
55fde95e 2800 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1457ab52
MN
2801 int i;
2802
2803 for(i=0; i<w; i++){
2804 const int src_1= src[ -srcStride];
2805 const int src0 = src[0 ];
2806 const int src1 = src[ srcStride];
2807 const int src2 = src[2*srcStride];
2808 const int src3 = src[3*srcStride];
2809 const int src4 = src[4*srcStride];
2810 const int src5 = src[5*srcStride];
2811 const int src6 = src[6*srcStride];
2812 const int src7 = src[7*srcStride];
2813 const int src8 = src[8*srcStride];
2814 const int src9 = src[9*srcStride];
2815 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
2816 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
2817 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
2818 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
2819 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
2820 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
2821 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
2822 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
2823 src++;
2824 dst++;
2825 }
2826}
2827
2828static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
2829 put_pixels8_c(dst, src, stride, 8);
2830}
2831
2832static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
2833 uint8_t half[64];
2834 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
2835 put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
2836}
2837
2838static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
2839 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
2840}
2841
2842static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
2843 uint8_t half[64];
2844 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
2845 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
2846}
2847
2848static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
2849 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
2850}
2851
2852static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
2853 uint8_t halfH[88];
2854 uint8_t halfV[64];
2855 uint8_t halfHV[64];
2856 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2857 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
2858 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
2859 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
2860}
2861static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
2862 uint8_t halfH[88];
2863 uint8_t halfV[64];
2864 uint8_t halfHV[64];
2865 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2866 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
2867 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
2868 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
2869}
2870static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
2871 uint8_t halfH[88];
2872 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2873 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
2874}
2875
332f9ac4 2876static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
49fb20cb 2877 if(CONFIG_ANY_H263) {
332f9ac4
MN
2878 int x;
2879 const int strength= ff_h263_loop_filter_strength[qscale];
115329f1 2880
332f9ac4
MN
2881 for(x=0; x<8; x++){
2882 int d1, d2, ad1;
2883 int p0= src[x-2*stride];
2884 int p1= src[x-1*stride];
2885 int p2= src[x+0*stride];
2886 int p3= src[x+1*stride];
2887 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
2888
2889 if (d<-2*strength) d1= 0;
2890 else if(d<- strength) d1=-2*strength - d;
2891 else if(d< strength) d1= d;
2892 else if(d< 2*strength) d1= 2*strength - d;
2893 else d1= 0;
115329f1 2894
332f9ac4
MN
2895 p1 += d1;
2896 p2 -= d1;
2897 if(p1&256) p1= ~(p1>>31);
2898 if(p2&256) p2= ~(p2>>31);
115329f1 2899
332f9ac4
MN
2900 src[x-1*stride] = p1;
2901 src[x+0*stride] = p2;
2902
c26abfa5 2903 ad1= FFABS(d1)>>1;
115329f1 2904
f66e4f5f 2905 d2= av_clip((p0-p3)/4, -ad1, ad1);
115329f1 2906
332f9ac4
MN
2907 src[x-2*stride] = p0 - d2;
2908 src[x+ stride] = p3 + d2;
2909 }
73f51a4d 2910 }
332f9ac4
MN
2911}
2912
2913static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
49fb20cb 2914 if(CONFIG_ANY_H263) {
332f9ac4
MN
2915 int y;
2916 const int strength= ff_h263_loop_filter_strength[qscale];
115329f1 2917
332f9ac4
MN
2918 for(y=0; y<8; y++){
2919 int d1, d2, ad1;
2920 int p0= src[y*stride-2];
2921 int p1= src[y*stride-1];
2922 int p2= src[y*stride+0];
2923 int p3= src[y*stride+1];
2924 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
2925
2926 if (d<-2*strength) d1= 0;
2927 else if(d<- strength) d1=-2*strength - d;
2928 else if(d< strength) d1= d;
2929 else if(d< 2*strength) d1= 2*strength - d;
2930 else d1= 0;
115329f1 2931
332f9ac4
MN
2932 p1 += d1;
2933 p2 -= d1;
2934 if(p1&256) p1= ~(p1>>31);
2935 if(p2&256) p2= ~(p2>>31);
115329f1 2936
332f9ac4
MN
2937 src[y*stride-1] = p1;
2938 src[y*stride+0] = p2;
2939
c26abfa5 2940 ad1= FFABS(d1)>>1;
115329f1 2941
f66e4f5f 2942 d2= av_clip((p0-p3)/4, -ad1, ad1);
115329f1 2943
332f9ac4
MN
2944 src[y*stride-2] = p0 - d2;
2945 src[y*stride+1] = p3 + d2;
2946 }
73f51a4d 2947 }
332f9ac4 2948}
1457ab52 2949
fdbbf2e0
MN
2950static void h261_loop_filter_c(uint8_t *src, int stride){
2951 int x,y,xy,yz;
2952 int temp[64];
2953
2954 for(x=0; x<8; x++){
2955 temp[x ] = 4*src[x ];
2956 temp[x + 7*8] = 4*src[x + 7*stride];
2957 }
2958 for(y=1; y<7; y++){
2959 for(x=0; x<8; x++){
2960 xy = y * stride + x;
2961 yz = y * 8 + x;
2962 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
c6148de2
MN
2963 }
2964 }
115329f1 2965
fdbbf2e0
MN
2966 for(y=0; y<8; y++){
2967 src[ y*stride] = (temp[ y*8] + 2)>>2;
2968 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
2969 for(x=1; x<7; x++){
2970 xy = y * stride + x;
2971 yz = y * 8 + x;
2972 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
c6148de2
MN
2973 }
2974 }
2975}
2976
5cf08f23 2977static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
42251a2a
LM
2978{
2979 int i, d;
2980 for( i = 0; i < 4; i++ ) {
2981 if( tc0[i] < 0 ) {
2982 pix += 4*ystride;
2983 continue;
2984 }
2985 for( d = 0; d < 4; d++ ) {
2986 const int p0 = pix[-1*xstride];
2987 const int p1 = pix[-2*xstride];
2988 const int p2 = pix[-3*xstride];
2989 const int q0 = pix[0];
2990 const int q1 = pix[1*xstride];
2991 const int q2 = pix[2*xstride];
115329f1 2992
c26abfa5
DB
2993 if( FFABS( p0 - q0 ) < alpha &&
2994 FFABS( p1 - p0 ) < beta &&
2995 FFABS( q1 - q0 ) < beta ) {
115329f1 2996
42251a2a
LM
2997 int tc = tc0[i];
2998 int i_delta;
115329f1 2999
c26abfa5 3000 if( FFABS( p2 - p0 ) < beta ) {
f66e4f5f 3001 pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] );
42251a2a
LM
3002 tc++;
3003 }
c26abfa5 3004 if( FFABS( q2 - q0 ) < beta ) {
f66e4f5f 3005 pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] );
42251a2a
LM
3006 tc++;
3007 }
115329f1 3008
f66e4f5f
RD
3009 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
3010 pix[-xstride] = av_clip_uint8( p0 + i_delta ); /* p0' */
3011 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
42251a2a
LM
3012 }
3013 pix += ystride;
3014 }
3015 }
3016}
5cf08f23 3017static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
42251a2a
LM
3018{
3019 h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0);
3020}
5cf08f23 3021static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
42251a2a
LM
3022{
3023 h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0);
3024}
3025
712ca84c
JGG
3026static inline void h264_loop_filter_luma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
3027{
3028 int d;
3029 for( d = 0; d < 16; d++ ) {
3030 const int p2 = pix[-3*xstride];
3031 const int p1 = pix[-2*xstride];
3032 const int p0 = pix[-1*xstride];
3033
3034 const int q0 = pix[ 0*xstride];
3035 const int q1 = pix[ 1*xstride];
3036 const int q2 = pix[ 2*xstride];
3037
3038 if( FFABS( p0 - q0 ) < alpha &&
3039 FFABS( p1 - p0 ) < beta &&
3040 FFABS( q1 - q0 ) < beta ) {
3041
3042 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
3043 if( FFABS( p2 - p0 ) < beta)
3044 {
3045 const int p3 = pix[-4*xstride];
3046 /* p0', p1', p2' */
3047 pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
3048 pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
3049 pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
3050 } else {
3051 /* p0' */
3052 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
3053 }
3054 if( FFABS( q2 - q0 ) < beta)
3055 {
3056 const int q3 = pix[3*xstride];
3057 /* q0', q1', q2' */
3058 pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
3059 pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
3060 pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
3061 } else {
3062 /* q0' */
3063 pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
3064 }
3065 }else{
3066 /* p0', q0' */
3067 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
3068 pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
3069 }
3070 }
3071 pix += ystride;
3072 }
3073}
3074static void h264_v_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
3075{
3076 h264_loop_filter_luma_intra_c(pix, stride, 1, alpha, beta);
3077}
3078static void h264_h_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
3079{
3080 h264_loop_filter_luma_intra_c(pix, 1, stride, alpha, beta);
3081}
3082
5cf08f23 3083static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
42251a2a
LM
3084{
3085 int i, d;
3086 for( i = 0; i < 4; i++ ) {
3087 const int tc = tc0[i];
3088 if( tc <= 0 ) {
3089 pix += 2*ystride;
3090 continue;
3091 }
3092 for( d = 0; d < 2; d++ ) {
3093 const int p0 = pix[-1*xstride];
3094 const int p1 = pix[-2*xstride];
3095 const int q0 = pix[0];
3096 const int q1 = pix[1*xstride];
3097
c26abfa5
DB
3098 if( FFABS( p0 - q0 ) < alpha &&
3099 FFABS( p1 - p0 ) < beta &&
3100 FFABS( q1 - q0 ) < beta ) {
42251a2a 3101
f66e4f5f 3102 int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
42251a2a 3103
f66e4f5f
RD
3104 pix[-xstride] = av_clip_uint8( p0 + delta ); /* p0' */
3105 pix[0] = av_clip_uint8( q0 - delta ); /* q0' */
42251a2a
LM
3106 }
3107 pix += ystride;
3108 }
3109 }
3110}
5cf08f23 3111static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
42251a2a
LM
3112{
3113 h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0);
3114}
5cf08f23 3115static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
42251a2a
LM
3116{
3117 h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0);
3118}
3119
5cf08f23
LM
3120static inline void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
3121{
3122 int d;
3123 for( d = 0; d < 8; d++ ) {
3124 const int p0 = pix[-1*xstride];
3125 const int p1 = pix[-2*xstride];
3126 const int q0 = pix[0];
3127 const int q1 = pix[1*xstride];
3128
c26abfa5
DB
3129 if( FFABS( p0 - q0 ) < alpha &&
3130 FFABS( p1 - p0 ) < beta &&
3131 FFABS( q1 - q0 ) < beta ) {
5cf08f23
LM
3132
3133 pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
3134 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
3135 }
3136 pix += ystride;
3137 }
3138}
3139static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
3140{
3141 h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta);
3142}
3143static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
3144{
3145 h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta);
3146}
3147
bb198e19 3148static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
3149{
3150 int s, i;
3151
3152 s = 0;
bb198e19 3153 for(i=0;i<h;i++) {
de6d9b64
FB
3154 s += abs(pix1[0] - pix2[0]);
3155 s += abs(pix1[1] - pix2[1]);
3156 s += abs(pix1[2] - pix2[2]);
3157 s += abs(pix1[3] - pix2[3]);
3158 s += abs(pix1[4] - pix2[4]);
3159 s += abs(pix1[5] - pix2[5]);
3160 s += abs(pix1[6] - pix2[6]);
3161 s += abs(pix1[7] - pix2[7]);
3162 s += abs(pix1[8] - pix2[8]);
3163 s += abs(pix1[9] - pix2[9]);
3164 s += abs(pix1[10] - pix2[10]);
3165 s += abs(pix1[11] - pix2[11]);
3166 s += abs(pix1[12] - pix2[12]);
3167 s += abs(pix1[13] - pix2[13]);
3168 s += abs(pix1[14] - pix2[14]);
3169 s += abs(pix1[15] - pix2[15]);
3170 pix1 += line_size;
3171 pix2 += line_size;
3172 }
3173 return s;
3174}
3175
bb198e19 3176static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
3177{
3178 int s, i;
3179
3180 s = 0;
bb198e19 3181 for(i=0;i<h;i++) {
de6d9b64
FB
3182 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
3183 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
3184 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
3185 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
3186 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
3187 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
3188 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
3189 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
3190 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
3191 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
3192 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
3193 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
3194 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
3195 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
3196 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
3197 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
3198 pix1 += line_size;
3199 pix2 += line_size;
3200 }
3201 return s;
3202}
3203
bb198e19 3204static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
3205{
3206 int s, i;
0c1a9eda 3207 uint8_t *pix3 = pix2 + line_size;
de6d9b64
FB
3208
3209 s = 0;
bb198e19 3210 for(i=0;i<h;i++) {
de6d9b64
FB
3211 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
3212 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
3213 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
3214 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
3215 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
3216 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
3217 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
3218 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
3219 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
3220 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
3221 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
3222 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
3223 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
3224 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
3225 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
3226 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
3227 pix1 += line_size;
3228 pix2 += line_size;
3229 pix3 += line_size;
3230 }
3231 return s;
3232}
3233
bb198e19 3234static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
3235{
3236 int s, i;
0c1a9eda 3237 uint8_t *pix3 = pix2 + line_size;
de6d9b64
FB
3238
3239 s = 0;
bb198e19 3240 for(i=0;i<h;i++) {
de6d9b64
FB
3241 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
3242 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
3243 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
3244 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
3245 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
3246 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
3247 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
3248 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
3249 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
3250 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
3251 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
3252 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
3253 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
3254 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
3255 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
3256 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
3257 pix1 += line_size;
3258 pix2 += line_size;
3259 pix3 += line_size;
3260 }
3261 return s;
3262}
3263
bb198e19 3264static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
3265{
3266 int s, i;
3267
3268 s = 0;
bb198e19 3269 for(i=0;i<h;i++) {
ba6802de
MN
3270 s += abs(pix1[0] - pix2[0]);
3271 s += abs(pix1[1] - pix2[1]);
3272 s += abs(pix1[2] - pix2[2]);
3273 s += abs(pix1[3] - pix2[3]);
3274 s += abs(pix1[4] - pix2[4]);
3275 s += abs(pix1[5] - pix2[5]);
3276 s += abs(pix1[6] - pix2[6]);
3277 s += abs(pix1[7] - pix2[7]);
3278 pix1 += line_size;
3279 pix2 += line_size;
3280 }
3281 return s;
3282}
3283
bb198e19 3284static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
3285{
3286 int s, i;
3287
3288 s = 0;
bb198e19 3289 for(i=0;i<h;i++) {
ba6802de
MN
3290 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
3291 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
3292 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
3293 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
3294 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
3295 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
3296 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
3297 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
3298 pix1 += line_size;
3299 pix2 += line_size;
3300 }
3301 return s;
3302}
3303
bb198e19 3304static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
3305{
3306 int s, i;
0c1a9eda 3307 uint8_t *pix3 = pix2 + line_size;
ba6802de
MN
3308
3309 s = 0;
bb198e19 3310 for(i=0;i<h;i++) {
ba6802de
MN
3311 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
3312 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
3313 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
3314 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
3315 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
3316 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
3317 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
3318 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
3319 pix1 += line_size;
3320 pix2 += line_size;
3321 pix3 += line_size;
3322 }
3323 return s;
3324}
3325
bb198e19 3326static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
3327{
3328 int s, i;
0c1a9eda 3329 uint8_t *pix3 = pix2 + line_size;
ba6802de
MN
3330
3331 s = 0;
bb198e19 3332 for(i=0;i<h;i++) {
ba6802de
MN
3333 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
3334 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
3335 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
3336 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
3337 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
3338 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
3339 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
3340 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
3341 pix1 += line_size;
3342 pix2 += line_size;
3343 pix3 += line_size;
3344 }
3345 return s;
3346}
3347
bf4e3bd2
MR
3348static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
3349 MpegEncContext *c = v;
e6a2ac34
MN
3350 int score1=0;
3351 int score2=0;
3352 int x,y;
d4c5d2ad 3353
e6a2ac34
MN
3354 for(y=0; y<h; y++){
3355 for(x=0; x<16; x++){
3356 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
3357 }
3358 if(y+1<h){
3359 for(x=0; x<15; x++){
c26abfa5 3360 score2+= FFABS( s1[x ] - s1[x +stride]
e6a2ac34 3361 - s1[x+1] + s1[x+1+stride])
c26abfa5 3362 -FFABS( s2[x ] - s2[x +stride]
e6a2ac34
MN
3363 - s2[x+1] + s2[x+1+stride]);
3364 }
3365 }
3366 s1+= stride;
3367 s2+= stride;
3368 }
d4c5d2ad 3369
c26abfa5
DB
3370 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
3371 else return score1 + FFABS(score2)*8;
e6a2ac34
MN
3372}
3373
bf4e3bd2
MR
3374static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
3375 MpegEncContext *c = v;
e6a2ac34
MN
3376 int score1=0;
3377 int score2=0;
3378 int x,y;
115329f1 3379
e6a2ac34
MN
3380 for(y=0; y<h; y++){
3381 for(x=0; x<8; x++){
3382 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
3383 }
3384 if(y+1<h){
3385 for(x=0; x<7; x++){
c26abfa5 3386 score2+= FFABS( s1[x ] - s1[x +stride]
e6a2ac34 3387 - s1[x+1] + s1[x+1+stride])
c26abfa5 3388 -FFABS( s2[x ] - s2[x +stride]
e6a2ac34
MN
3389 - s2[x+1] + s2[x+1+stride]);
3390 }
3391 }
3392 s1+= stride;
3393 s2+= stride;
3394 }
115329f1 3395
c26abfa5
DB
3396 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
3397 else return score1 + FFABS(score2)*8;
e6a2ac34
MN
3398}
3399
364a1797
MN
3400static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
3401 int i;
3402 unsigned int sum=0;
3403
3404 for(i=0; i<8*8; i++){
3405 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
3406 int w= weight[i];
3407 b>>= RECON_SHIFT;
3408 assert(-512<b && b<512);
3409
3410 sum += (w*b)*(w*b)>>4;
3411 }
3412 return sum>>2;
3413}
3414
3415static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
3416 int i;
3417
3418 for(i=0; i<8*8; i++){
3419 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
115329f1 3420 }
364a1797
MN
3421}
3422
a9badb51
MN
3423/**
3424 * permutes an 8x8 block.
2a5700de 3425 * @param block the block which will be permuted according to the given permutation vector
a9badb51
MN
3426 * @param permutation the permutation vector
3427 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
115329f1 3428 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
2a5700de 3429 * (inverse) permutated to scantable order!
a9badb51 3430 */
0c1a9eda 3431void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
d962f6fd 3432{
7801d21d 3433 int i;
477ab036 3434 DCTELEM temp[64];
115329f1 3435
7801d21d 3436 if(last<=0) return;
90b5b51e 3437 //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
d962f6fd 3438
7801d21d
MN
3439 for(i=0; i<=last; i++){
3440 const int j= scantable[i];
3441 temp[j]= block[j];
3442 block[j]=0;
3443 }
115329f1 3444
7801d21d
MN
3445 for(i=0; i<=last; i++){
3446 const int j= scantable[i];
3447 const int perm_j= permutation[j];
3448 block[perm_j]= temp[j];
3449 }
d962f6fd 3450}
e0eac44e 3451
622348f9
MN
3452static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
3453 return 0;
3454}
3455
3456void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
3457 int i;
115329f1 3458
3899eb2f 3459 memset(cmp, 0, sizeof(void*)*6);
115329f1 3460
3899eb2f 3461 for(i=0; i<6; i++){
622348f9
MN
3462 switch(type&0xFF){
3463 case FF_CMP_SAD:
3464 cmp[i]= c->sad[i];
3465 break;
3466 case FF_CMP_SATD:
3467 cmp[i]= c->hadamard8_diff[i];
3468 break;
3469 case FF_CMP_SSE:
3470 cmp[i]= c->sse[i];
3471 break;
3472 case FF_CMP_DCT:
3473 cmp[i]= c->dct_sad[i];
3474 break;
27c61ac5
MN
3475 case FF_CMP_DCT264:
3476 cmp[i]= c->dct264_sad[i];
3477 break;
0fd6aea1
MN
3478 case FF_CMP_DCTMAX:
3479 cmp[i]= c->dct_max[i];
3480 break;
622348f9
MN
3481 case FF_CMP_PSNR:
3482 cmp[i]= c->quant_psnr[i];
3483 break;
3484 case FF_CMP_BIT:
3485 cmp[i]= c->bit[i];
3486 break;
3487 case FF_CMP_RD:
3488 cmp[i]= c->rd[i];
3489 break;
3490 case FF_CMP_VSAD:
3491 cmp[i]= c->vsad[i];
3492 break;
3493 case FF_CMP_VSSE:
3494 cmp[i]= c->vsse[i];
3495 break;
3496 case FF_CMP_ZERO:
3497 cmp[i]= zero_cmp;
3498 break;
e6a2ac34
MN
3499 case FF_CMP_NSSE:
3500 cmp[i]= c->nsse[i];
3501 break;
b250f9c6 3502#if CONFIG_SNOW_ENCODER
26efc54e
MN
3503 case FF_CMP_W53:
3504 cmp[i]= c->w53[i];
3505 break;
3506 case FF_CMP_W97:
3507 cmp[i]= c->w97[i];
3508 break;
3a6fc8fa 3509#endif
622348f9
MN
3510 default:
3511 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
3512 }
3513 }
3514}
3515
5fecfb7d
LM
3516static void clear_block_c(DCTELEM *block)
3517{
3518 memset(block, 0, sizeof(DCTELEM)*64);
3519}
3520
2a5700de
MN
3521/**
3522 * memset(blocks, 0, sizeof(DCTELEM)*6*64)
3523 */
eb4b3dd3 3524static void clear_blocks_c(DCTELEM *blocks)
649c00c9
MN
3525{
3526 memset(blocks, 0, sizeof(DCTELEM)*6*64);
3527}
3528
11f18faf 3529static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
469bd7b1
LM
3530 long i;
3531 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
3532 long a = *(long*)(src+i);
3533 long b = *(long*)(dst+i);
3534 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
11f18faf
MN
3535 }
3536 for(; i<w; i++)
3537 dst[i+0] += src[i+0];
3538}
3539
4a9ca0a2 3540static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
469bd7b1 3541 long i;
4a9ca0a2
LM
3542 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
3543 long a = *(long*)(src1+i);
3544 long b = *(long*)(src2+i);
469bd7b1 3545 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
4a9ca0a2
LM
3546 }
3547 for(; i<w; i++)
3548 dst[i] = src1[i]+src2[i];
3549}
3550