Move some VC1 dsp prototypes to dsputil.h; they are defined in dsputil.c
[libav.git] / libavcodec / dsputil.c
CommitLineData
de6d9b64
FB
1/*
2 * DSP utils
406792e7 3 * Copyright (c) 2000, 2001 Fabrice Bellard
8f2ab833 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
de6d9b64 5 *
7b94177e
DB
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
7 *
b78e7197
DB
8 * This file is part of FFmpeg.
9 *
10 * FFmpeg is free software; you can redistribute it and/or
ff4ec49e
FB
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
b78e7197 13 * version 2.1 of the License, or (at your option) any later version.
de6d9b64 14 *
b78e7197 15 * FFmpeg is distributed in the hope that it will be useful,
de6d9b64 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ff4ec49e
FB
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
de6d9b64 19 *
ff4ec49e 20 * You should have received a copy of the GNU Lesser General Public
b78e7197 21 * License along with FFmpeg; if not, write to the Free Software
5509bffa 22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
de6d9b64 23 */
115329f1 24
983e3246 25/**
bad5537e 26 * @file libavcodec/dsputil.c
983e3246
MN
27 * DSP utils
28 */
115329f1 29
de6d9b64
FB
30#include "avcodec.h"
31#include "dsputil.h"
b0368839 32#include "simple_idct.h"
65e4c8c9 33#include "faandct.h"
6f08c541 34#include "faanidct.h"
199436b9 35#include "mathops.h"
059715a4 36#include "snow.h"
af818f7a
DB
37#include "mpegvideo.h"
38#include "config.h"
3da11804
MR
39#include "lpc.h"
40#include "ac3dec.h"
41#include "vorbis.h"
42#include "png.h"
5596c60c 43
88730be6
MR
44/* snow.c */
45void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
46
28245435
PR
47/* eaidct.c */
48void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block);
49
342c7dfd
KS
50/* binkidct.c */
51void ff_bink_idct_c (DCTELEM *block);
52void ff_bink_idct_add_c(uint8_t *dest, int linesize, DCTELEM *block);
53void ff_bink_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block);
54
55fde95e 55uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
1d503957 56uint32_t ff_squareTbl[512] = {0, };
de6d9b64 57
917f55cc
LM
58// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
59#define pb_7f (~0UL/255 * 0x7f)
60#define pb_80 (~0UL/255 * 0x80)
469bd7b1 61
0c1a9eda 62const uint8_t ff_zigzag_direct[64] = {
2ad1516a
MN
63 0, 1, 8, 16, 9, 2, 3, 10,
64 17, 24, 32, 25, 18, 11, 4, 5,
e0eac44e 65 12, 19, 26, 33, 40, 48, 41, 34,
2ad1516a 66 27, 20, 13, 6, 7, 14, 21, 28,
e0eac44e
FB
67 35, 42, 49, 56, 57, 50, 43, 36,
68 29, 22, 15, 23, 30, 37, 44, 51,
69 58, 59, 52, 45, 38, 31, 39, 46,
70 53, 60, 61, 54, 47, 55, 62, 63
71};
72
10acc479
RS
73/* Specific zigzag scan for 248 idct. NOTE that unlike the
74 specification, we interleave the fields */
75const uint8_t ff_zigzag248_direct[64] = {
76 0, 8, 1, 9, 16, 24, 2, 10,
77 17, 25, 32, 40, 48, 56, 33, 41,
78 18, 26, 3, 11, 4, 12, 19, 27,
79 34, 42, 49, 57, 50, 58, 35, 43,
80 20, 28, 5, 13, 6, 14, 21, 29,
81 36, 44, 51, 59, 52, 60, 37, 45,
82 22, 30, 7, 15, 23, 31, 38, 46,
83 53, 61, 54, 62, 39, 47, 55, 63,
84};
85
2f349de2 86/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
84dc2d8a 87DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
2f349de2 88
0c1a9eda 89const uint8_t ff_alternate_horizontal_scan[64] = {
115329f1 90 0, 1, 2, 3, 8, 9, 16, 17,
e0eac44e 91 10, 11, 4, 5, 6, 7, 15, 14,
115329f1 92 13, 12, 19, 18, 24, 25, 32, 33,
e0eac44e 93 26, 27, 20, 21, 22, 23, 28, 29,
115329f1 94 30, 31, 34, 35, 40, 41, 48, 49,
e0eac44e 95 42, 43, 36, 37, 38, 39, 44, 45,
115329f1 96 46, 47, 50, 51, 56, 57, 58, 59,
e0eac44e
FB
97 52, 53, 54, 55, 60, 61, 62, 63,
98};
99
0c1a9eda 100const uint8_t ff_alternate_vertical_scan[64] = {
115329f1 101 0, 8, 16, 24, 1, 9, 2, 10,
e0eac44e 102 17, 25, 32, 40, 48, 56, 57, 49,
115329f1 103 41, 33, 26, 18, 3, 11, 4, 12,
e0eac44e 104 19, 27, 34, 42, 50, 58, 35, 43,
115329f1 105 51, 59, 20, 28, 5, 13, 6, 14,
e0eac44e 106 21, 29, 36, 44, 52, 60, 37, 45,
115329f1 107 53, 61, 22, 30, 7, 15, 23, 31,
e0eac44e
FB
108 38, 46, 54, 62, 39, 47, 55, 63,
109};
110
1a918c08
LM
111/* a*inverse[b]>>32 == a/b for all 0<=a<=16909558 && 2<=b<=256
112 * for a>16909558, is an overestimate by less than 1 part in 1<<24 */
113const uint32_t ff_inverse[257]={
115329f1
DB
114 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
115 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
116 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
117 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
118 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
119 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
120 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
121 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
122 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
123 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
124 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
125 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
126 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
127 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
128 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
129 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
130 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
131 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
132 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
133 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
134 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
135 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
136 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
137 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
138 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
139 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
140 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
141 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
142 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
143 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
144 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
2f349de2 145 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
1a918c08 146 16777216
2f349de2
MN
147};
148
b0368839
MN
149/* Input permutation for the simple_idct_mmx */
150static const uint8_t simple_mmx_permutation[64]={
bb270c08
DB
151 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
152 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
153 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
154 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
155 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
156 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
157 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
158 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
b0368839
MN
159};
160
0e956ba2
AS
161static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
162
4c79b95c
AJ
163void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
164 int i;
165 int end;
166
167 st->scantable= src_scantable;
168
169 for(i=0; i<64; i++){
170 int j;
171 j = src_scantable[i];
172 st->permutated[i] = permutation[j];
b250f9c6 173#if ARCH_PPC
4c79b95c
AJ
174 st->inverse[j] = i;
175#endif
176 }
177
178 end=-1;
179 for(i=0; i<64; i++){
180 int j;
181 j = st->permutated[i];
182 if(j>end) end=j;
183 st->raster_end[i]= end;
184 }
185}
186
0c1a9eda 187static int pix_sum_c(uint8_t * pix, int line_size)
3aa102be
MN
188{
189 int s, i, j;
190
191 s = 0;
192 for (i = 0; i < 16; i++) {
bb270c08
DB
193 for (j = 0; j < 16; j += 8) {
194 s += pix[0];
195 s += pix[1];
196 s += pix[2];
197 s += pix[3];
198 s += pix[4];
199 s += pix[5];
200 s += pix[6];
201 s += pix[7];
202 pix += 8;
203 }
204 pix += line_size - 16;
3aa102be
MN
205 }
206 return s;
207}
208
0c1a9eda 209static int pix_norm1_c(uint8_t * pix, int line_size)
3aa102be
MN
210{
211 int s, i, j;
1d503957 212 uint32_t *sq = ff_squareTbl + 256;
3aa102be
MN
213
214 s = 0;
215 for (i = 0; i < 16; i++) {
bb270c08 216 for (j = 0; j < 16; j += 8) {
2a006cd3 217#if 0
bb270c08
DB
218 s += sq[pix[0]];
219 s += sq[pix[1]];
220 s += sq[pix[2]];
221 s += sq[pix[3]];
222 s += sq[pix[4]];
223 s += sq[pix[5]];
224 s += sq[pix[6]];
225 s += sq[pix[7]];
2a006cd3
FL
226#else
227#if LONG_MAX > 2147483647
bb270c08
DB
228 register uint64_t x=*(uint64_t*)pix;
229 s += sq[x&0xff];
230 s += sq[(x>>8)&0xff];
231 s += sq[(x>>16)&0xff];
232 s += sq[(x>>24)&0xff];
2a006cd3
FL
233 s += sq[(x>>32)&0xff];
234 s += sq[(x>>40)&0xff];
235 s += sq[(x>>48)&0xff];
236 s += sq[(x>>56)&0xff];
237#else
bb270c08
DB
238 register uint32_t x=*(uint32_t*)pix;
239 s += sq[x&0xff];
240 s += sq[(x>>8)&0xff];
241 s += sq[(x>>16)&0xff];
242 s += sq[(x>>24)&0xff];
2a006cd3
FL
243 x=*(uint32_t*)(pix+4);
244 s += sq[x&0xff];
245 s += sq[(x>>8)&0xff];
246 s += sq[(x>>16)&0xff];
247 s += sq[(x>>24)&0xff];
248#endif
249#endif
bb270c08
DB
250 pix += 8;
251 }
252 pix += line_size - 16;
3aa102be
MN
253 }
254 return s;
255}
256
96711ecf 257static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
3d2e8cce 258 int i;
115329f1 259
3d2e8cce
MN
260 for(i=0; i+8<=w; i+=8){
261 dst[i+0]= bswap_32(src[i+0]);
262 dst[i+1]= bswap_32(src[i+1]);
263 dst[i+2]= bswap_32(src[i+2]);
264 dst[i+3]= bswap_32(src[i+3]);
265 dst[i+4]= bswap_32(src[i+4]);
266 dst[i+5]= bswap_32(src[i+5]);
267 dst[i+6]= bswap_32(src[i+6]);
268 dst[i+7]= bswap_32(src[i+7]);
269 }
270 for(;i<w; i++){
271 dst[i+0]= bswap_32(src[i+0]);
272 }
273}
3aa102be 274
26efc54e
MN
275static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
276{
277 int s, i;
1d503957 278 uint32_t *sq = ff_squareTbl + 256;
26efc54e
MN
279
280 s = 0;
281 for (i = 0; i < h; i++) {
282 s += sq[pix1[0] - pix2[0]];
283 s += sq[pix1[1] - pix2[1]];
284 s += sq[pix1[2] - pix2[2]];
285 s += sq[pix1[3] - pix2[3]];
286 pix1 += line_size;
287 pix2 += line_size;
288 }
289 return s;
290}
291
bb198e19 292static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
1457ab52
MN
293{
294 int s, i;
1d503957 295 uint32_t *sq = ff_squareTbl + 256;
1457ab52
MN
296
297 s = 0;
bb198e19 298 for (i = 0; i < h; i++) {
1457ab52
MN
299 s += sq[pix1[0] - pix2[0]];
300 s += sq[pix1[1] - pix2[1]];
301 s += sq[pix1[2] - pix2[2]];
302 s += sq[pix1[3] - pix2[3]];
303 s += sq[pix1[4] - pix2[4]];
304 s += sq[pix1[5] - pix2[5]];
305 s += sq[pix1[6] - pix2[6]];
306 s += sq[pix1[7] - pix2[7]];
307 pix1 += line_size;
308 pix2 += line_size;
309 }
310 return s;
311}
312
bb198e19 313static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
9c76bd48 314{
6b026927 315 int s, i;
1d503957 316 uint32_t *sq = ff_squareTbl + 256;
9c76bd48
BF
317
318 s = 0;
bb198e19 319 for (i = 0; i < h; i++) {
6b026927
FH
320 s += sq[pix1[ 0] - pix2[ 0]];
321 s += sq[pix1[ 1] - pix2[ 1]];
322 s += sq[pix1[ 2] - pix2[ 2]];
323 s += sq[pix1[ 3] - pix2[ 3]];
324 s += sq[pix1[ 4] - pix2[ 4]];
325 s += sq[pix1[ 5] - pix2[ 5]];
326 s += sq[pix1[ 6] - pix2[ 6]];
327 s += sq[pix1[ 7] - pix2[ 7]];
328 s += sq[pix1[ 8] - pix2[ 8]];
329 s += sq[pix1[ 9] - pix2[ 9]];
330 s += sq[pix1[10] - pix2[10]];
331 s += sq[pix1[11] - pix2[11]];
332 s += sq[pix1[12] - pix2[12]];
333 s += sq[pix1[13] - pix2[13]];
334 s += sq[pix1[14] - pix2[14]];
335 s += sq[pix1[15] - pix2[15]];
2a006cd3 336
6b026927
FH
337 pix1 += line_size;
338 pix2 += line_size;
9c76bd48
BF
339 }
340 return s;
341}
342
26efc54e 343
b250f9c6 344#if CONFIG_SNOW_ENCODER //dwt is in snow.c
3a6fc8fa 345static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){
26efc54e
MN
346 int s, i, j;
347 const int dec_count= w==8 ? 3 : 4;
871371a7 348 int tmp[32*32];
26efc54e 349 int level, ori;
115329f1 350 static const int scale[2][2][4][4]={
26efc54e
MN
351 {
352 {
871371a7 353 // 9/7 8x8 dec=3
26efc54e
MN
354 {268, 239, 239, 213},
355 { 0, 224, 224, 152},
356 { 0, 135, 135, 110},
357 },{
871371a7 358 // 9/7 16x16 or 32x32 dec=4
26efc54e
MN
359 {344, 310, 310, 280},
360 { 0, 320, 320, 228},
361 { 0, 175, 175, 136},
362 { 0, 129, 129, 102},
363 }
364 },{
871371a7
LM
365 {
366 // 5/3 8x8 dec=3
26efc54e
MN
367 {275, 245, 245, 218},
368 { 0, 230, 230, 156},
369 { 0, 138, 138, 113},
370 },{
871371a7 371 // 5/3 16x16 or 32x32 dec=4
26efc54e
MN
372 {352, 317, 317, 286},
373 { 0, 328, 328, 233},
374 { 0, 180, 180, 140},
375 { 0, 132, 132, 105},
376 }
377 }
378 };
26efc54e
MN
379
380 for (i = 0; i < h; i++) {
381 for (j = 0; j < w; j+=4) {
871371a7
LM
382 tmp[32*i+j+0] = (pix1[j+0] - pix2[j+0])<<4;
383 tmp[32*i+j+1] = (pix1[j+1] - pix2[j+1])<<4;
384 tmp[32*i+j+2] = (pix1[j+2] - pix2[j+2])<<4;
385 tmp[32*i+j+3] = (pix1[j+3] - pix2[j+3])<<4;
26efc54e
MN
386 }
387 pix1 += line_size;
388 pix2 += line_size;
389 }
8b975b7c 390
871371a7 391 ff_spatial_dwt(tmp, w, h, 32, type, dec_count);
26efc54e
MN
392
393 s=0;
871371a7 394 assert(w==h);
26efc54e
MN
395 for(level=0; level<dec_count; level++){
396 for(ori= level ? 1 : 0; ori<4; ori++){
871371a7
LM
397 int size= w>>(dec_count-level);
398 int sx= (ori&1) ? size : 0;
399 int stride= 32<<(dec_count-level);
26efc54e 400 int sy= (ori&2) ? stride>>1 : 0;
115329f1 401
26efc54e
MN
402 for(i=0; i<size; i++){
403 for(j=0; j<size; j++){
404 int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
c26abfa5 405 s += FFABS(v);
26efc54e
MN
406 }
407 }
408 }
409 }
115329f1 410 assert(s>=0);
871371a7 411 return s>>9;
26efc54e
MN
412}
413
414static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
415 return w_c(v, pix1, pix2, line_size, 8, h, 1);
416}
417
418static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
419 return w_c(v, pix1, pix2, line_size, 8, h, 0);
420}
421
422static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
423 return w_c(v, pix1, pix2, line_size, 16, h, 1);
424}
425
426static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
427 return w_c(v, pix1, pix2, line_size, 16, h, 0);
428}
429
486497e0 430int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
871371a7
LM
431 return w_c(v, pix1, pix2, line_size, 32, h, 1);
432}
433
486497e0 434int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
871371a7
LM
435 return w_c(v, pix1, pix2, line_size, 32, h, 0);
436}
3a6fc8fa 437#endif
871371a7 438
5a6a9e78
AJ
439/* draw the edges of width 'w' of an image of size width, height */
440//FIXME check that this is ok for mpeg4 interlaced
441static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
442{
443 uint8_t *ptr, *last_line;
444 int i;
445
446 last_line = buf + (height - 1) * wrap;
447 for(i=0;i<w;i++) {
448 /* top and bottom */
449 memcpy(buf - (i + 1) * wrap, buf, width);
450 memcpy(last_line + (i + 1) * wrap, last_line, width);
451 }
452 /* left and right */
453 ptr = buf;
454 for(i=0;i<height;i++) {
455 memset(ptr - w, ptr[0], w);
456 memset(ptr + width, ptr[width-1], w);
457 ptr += wrap;
458 }
459 /* corners */
460 for(i=0;i<w;i++) {
461 memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
462 memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
463 memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
464 memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
465 }
466}
467
288a44fb
AJ
468/**
469 * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
470 * @param buf destination buffer
471 * @param src source buffer
472 * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
473 * @param block_w width of block
474 * @param block_h height of block
475 * @param src_x x coordinate of the top left sample of the block in the source buffer
476 * @param src_y y coordinate of the top left sample of the block in the source buffer
477 * @param w width of the source buffer
478 * @param h height of the source buffer
479 */
480void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
481 int src_x, int src_y, int w, int h){
482 int x, y;
483 int start_y, start_x, end_y, end_x;
484
485 if(src_y>= h){
486 src+= (h-1-src_y)*linesize;
487 src_y=h-1;
488 }else if(src_y<=-block_h){
489 src+= (1-block_h-src_y)*linesize;
490 src_y=1-block_h;
491 }
492 if(src_x>= w){
493 src+= (w-1-src_x);
494 src_x=w-1;
495 }else if(src_x<=-block_w){
496 src+= (1-block_w-src_x);
497 src_x=1-block_w;
498 }
499
500 start_y= FFMAX(0, -src_y);
501 start_x= FFMAX(0, -src_x);
502 end_y= FFMIN(block_h, h-src_y);
503 end_x= FFMIN(block_w, w-src_x);
504
505 // copy existing part
506 for(y=start_y; y<end_y; y++){
507 for(x=start_x; x<end_x; x++){
508 buf[x + y*linesize]= src[x + y*linesize];
509 }
510 }
511
512 //top
513 for(y=0; y<start_y; y++){
514 for(x=start_x; x<end_x; x++){
515 buf[x + y*linesize]= buf[x + start_y*linesize];
516 }
517 }
518
519 //bottom
520 for(y=end_y; y<block_h; y++){
521 for(x=start_x; x<end_x; x++){
522 buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
523 }
524 }
525
526 for(y=0; y<block_h; y++){
527 //left
528 for(x=0; x<start_x; x++){
529 buf[x + y*linesize]= buf[start_x + y*linesize];
530 }
531
532 //right
533 for(x=end_x; x<block_w; x++){
534 buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
535 }
536 }
537}
538
0c1a9eda 539static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
de6d9b64 540{
de6d9b64
FB
541 int i;
542
543 /* read the pixels */
de6d9b64 544 for(i=0;i<8;i++) {
c13e1abd
FH
545 block[0] = pixels[0];
546 block[1] = pixels[1];
547 block[2] = pixels[2];
548 block[3] = pixels[3];
549 block[4] = pixels[4];
550 block[5] = pixels[5];
551 block[6] = pixels[6];
552 block[7] = pixels[7];
553 pixels += line_size;
554 block += 8;
de6d9b64
FB
555 }
556}
557
0c1a9eda 558static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
bb270c08 559 const uint8_t *s2, int stride){
9dbcbd92
MN
560 int i;
561
562 /* read the pixels */
9dbcbd92 563 for(i=0;i<8;i++) {
c13e1abd
FH
564 block[0] = s1[0] - s2[0];
565 block[1] = s1[1] - s2[1];
566 block[2] = s1[2] - s2[2];
567 block[3] = s1[3] - s2[3];
568 block[4] = s1[4] - s2[4];
569 block[5] = s1[5] - s2[5];
570 block[6] = s1[6] - s2[6];
571 block[7] = s1[7] - s2[7];
9dbcbd92
MN
572 s1 += stride;
573 s2 += stride;
c13e1abd 574 block += 8;
9dbcbd92
MN
575 }
576}
577
578
0c1a9eda 579static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
bb270c08 580 int line_size)
de6d9b64 581{
de6d9b64 582 int i;
55fde95e 583 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 584
de6d9b64 585 /* read the pixels */
de6d9b64 586 for(i=0;i<8;i++) {
c13e1abd
FH
587 pixels[0] = cm[block[0]];
588 pixels[1] = cm[block[1]];
589 pixels[2] = cm[block[2]];
590 pixels[3] = cm[block[3]];
591 pixels[4] = cm[block[4]];
592 pixels[5] = cm[block[5]];
593 pixels[6] = cm[block[6]];
594 pixels[7] = cm[block[7]];
595
596 pixels += line_size;
597 block += 8;
de6d9b64
FB
598 }
599}
600
178fcca8 601static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
bb270c08 602 int line_size)
178fcca8
MN
603{
604 int i;
55fde95e 605 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 606
178fcca8
MN
607 /* read the pixels */
608 for(i=0;i<4;i++) {
609 pixels[0] = cm[block[0]];
610 pixels[1] = cm[block[1]];
611 pixels[2] = cm[block[2]];
612 pixels[3] = cm[block[3]];
613
614 pixels += line_size;
615 block += 8;
616 }
617}
618
9ca358b9 619static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
bb270c08 620 int line_size)
9ca358b9
MN
621{
622 int i;
55fde95e 623 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 624
9ca358b9
MN
625 /* read the pixels */
626 for(i=0;i<2;i++) {
627 pixels[0] = cm[block[0]];
628 pixels[1] = cm[block[1]];
629
630 pixels += line_size;
631 block += 8;
632 }
633}
634
115329f1 635static void put_signed_pixels_clamped_c(const DCTELEM *block,
f9ed9d85
MM
636 uint8_t *restrict pixels,
637 int line_size)
638{
639 int i, j;
640
641 for (i = 0; i < 8; i++) {
642 for (j = 0; j < 8; j++) {
643 if (*block < -128)
644 *pixels = 0;
645 else if (*block > 127)
646 *pixels = 255;
647 else
648 *pixels = (uint8_t)(*block + 128);
649 block++;
650 pixels++;
651 }
652 pixels += (line_size - 8);
653 }
654}
655
342c7dfd
KS
656static void put_pixels_nonclamped_c(const DCTELEM *block, uint8_t *restrict pixels,
657 int line_size)
658{
659 int i;
660
661 /* read the pixels */
662 for(i=0;i<8;i++) {
663 pixels[0] = block[0];
664 pixels[1] = block[1];
665 pixels[2] = block[2];
666 pixels[3] = block[3];
667 pixels[4] = block[4];
668 pixels[5] = block[5];
669 pixels[6] = block[6];
670 pixels[7] = block[7];
671
672 pixels += line_size;
673 block += 8;
674 }
675}
676
0c1a9eda 677static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
c13e1abd 678 int line_size)
de6d9b64 679{
de6d9b64 680 int i;
55fde95e 681 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 682
de6d9b64 683 /* read the pixels */
de6d9b64 684 for(i=0;i<8;i++) {
c13e1abd
FH
685 pixels[0] = cm[pixels[0] + block[0]];
686 pixels[1] = cm[pixels[1] + block[1]];
687 pixels[2] = cm[pixels[2] + block[2]];
688 pixels[3] = cm[pixels[3] + block[3]];
689 pixels[4] = cm[pixels[4] + block[4]];
690 pixels[5] = cm[pixels[5] + block[5]];
691 pixels[6] = cm[pixels[6] + block[6]];
692 pixels[7] = cm[pixels[7] + block[7]];
693 pixels += line_size;
694 block += 8;
de6d9b64
FB
695 }
696}
178fcca8
MN
697
698static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
699 int line_size)
700{
701 int i;
55fde95e 702 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 703
178fcca8
MN
704 /* read the pixels */
705 for(i=0;i<4;i++) {
706 pixels[0] = cm[pixels[0] + block[0]];
707 pixels[1] = cm[pixels[1] + block[1]];
708 pixels[2] = cm[pixels[2] + block[2]];
709 pixels[3] = cm[pixels[3] + block[3]];
710 pixels += line_size;
711 block += 8;
712 }
713}
9ca358b9
MN
714
715static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
716 int line_size)
717{
718 int i;
55fde95e 719 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 720
9ca358b9
MN
721 /* read the pixels */
722 for(i=0;i<2;i++) {
723 pixels[0] = cm[pixels[0] + block[0]];
724 pixels[1] = cm[pixels[1] + block[1]];
725 pixels += line_size;
726 block += 8;
727 }
728}
36940eca
LM
729
730static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
731{
732 int i;
733 for(i=0;i<8;i++) {
734 pixels[0] += block[0];
735 pixels[1] += block[1];
736 pixels[2] += block[2];
737 pixels[3] += block[3];
738 pixels[4] += block[4];
739 pixels[5] += block[5];
740 pixels[6] += block[6];
741 pixels[7] += block[7];
742 pixels += line_size;
743 block += 8;
744 }
745}
746
747static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
748{
749 int i;
750 for(i=0;i<4;i++) {
751 pixels[0] += block[0];
752 pixels[1] += block[1];
753 pixels[2] += block[2];
754 pixels[3] += block[3];
755 pixels += line_size;
756 block += 4;
757 }
758}
759
1edbfe19
LM
760static int sum_abs_dctelem_c(DCTELEM *block)
761{
762 int sum=0, i;
763 for(i=0; i<64; i++)
764 sum+= FFABS(block[i]);
765 return sum;
766}
767
342c7dfd
KS
768static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
769{
770 int i;
771
772 for (i = 0; i < h; i++) {
773 memset(block, value, 16);
774 block += line_size;
775 }
776}
777
778static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
779{
780 int i;
781
782 for (i = 0; i < h; i++) {
783 memset(block, value, 8);
784 block += line_size;
785 }
786}
787
788static void scale_block_c(const uint8_t src[64]/*align 8*/, uint8_t *dst/*align 8*/, int linesize)
789{
790 int i, j;
791 uint16_t *dst1 = dst;
792 uint16_t *dst2 = dst + linesize;
793
794 for (j = 0; j < 8; j++) {
795 for (i = 0; i < 8; i++) {
796 dst1[i] = dst2[i] = src[i] * 0x0101;
797 }
798 src += 8;
799 dst1 += linesize;
800 dst2 += linesize;
801 }
802}
803
59fe111e
MN
804#if 0
805
806#define PIXOP2(OPNAME, OP) \
b3184779 807static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
808{\
809 int i;\
810 for(i=0; i<h; i++){\
905694d9 811 OP(*((uint64_t*)block), AV_RN64(pixels));\
59fe111e
MN
812 pixels+=line_size;\
813 block +=line_size;\
814 }\
815}\
816\
45553457 817static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
818{\
819 int i;\
820 for(i=0; i<h; i++){\
905694d9
RS
821 const uint64_t a= AV_RN64(pixels );\
822 const uint64_t b= AV_RN64(pixels+1);\
59fe111e
MN
823 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
824 pixels+=line_size;\
825 block +=line_size;\
826 }\
827}\
828\
45553457 829static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
830{\
831 int i;\
832 for(i=0; i<h; i++){\
905694d9
RS
833 const uint64_t a= AV_RN64(pixels );\
834 const uint64_t b= AV_RN64(pixels+1);\
59fe111e
MN
835 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
836 pixels+=line_size;\
837 block +=line_size;\
838 }\
839}\
840\
45553457 841static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
842{\
843 int i;\
844 for(i=0; i<h; i++){\
905694d9
RS
845 const uint64_t a= AV_RN64(pixels );\
846 const uint64_t b= AV_RN64(pixels+line_size);\
59fe111e
MN
847 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
848 pixels+=line_size;\
849 block +=line_size;\
850 }\
851}\
852\
45553457 853static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
854{\
855 int i;\
856 for(i=0; i<h; i++){\
905694d9
RS
857 const uint64_t a= AV_RN64(pixels );\
858 const uint64_t b= AV_RN64(pixels+line_size);\
59fe111e
MN
859 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
860 pixels+=line_size;\
861 block +=line_size;\
862 }\
863}\
864\
45553457 865static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
866{\
867 int i;\
905694d9
RS
868 const uint64_t a= AV_RN64(pixels );\
869 const uint64_t b= AV_RN64(pixels+1);\
59fe111e
MN
870 uint64_t l0= (a&0x0303030303030303ULL)\
871 + (b&0x0303030303030303ULL)\
872 + 0x0202020202020202ULL;\
873 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
874 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
875 uint64_t l1,h1;\
876\
877 pixels+=line_size;\
878 for(i=0; i<h; i+=2){\
905694d9
RS
879 uint64_t a= AV_RN64(pixels );\
880 uint64_t b= AV_RN64(pixels+1);\
59fe111e
MN
881 l1= (a&0x0303030303030303ULL)\
882 + (b&0x0303030303030303ULL);\
883 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
884 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
885 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
886 pixels+=line_size;\
887 block +=line_size;\
905694d9
RS
888 a= AV_RN64(pixels );\
889 b= AV_RN64(pixels+1);\
59fe111e
MN
890 l0= (a&0x0303030303030303ULL)\
891 + (b&0x0303030303030303ULL)\
892 + 0x0202020202020202ULL;\
893 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
894 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
895 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
896 pixels+=line_size;\
897 block +=line_size;\
898 }\
899}\
900\
45553457 901static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
902{\
903 int i;\
905694d9
RS
904 const uint64_t a= AV_RN64(pixels );\
905 const uint64_t b= AV_RN64(pixels+1);\
59fe111e
MN
906 uint64_t l0= (a&0x0303030303030303ULL)\
907 + (b&0x0303030303030303ULL)\
908 + 0x0101010101010101ULL;\
909 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
910 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
911 uint64_t l1,h1;\
912\
913 pixels+=line_size;\
914 for(i=0; i<h; i+=2){\
905694d9
RS
915 uint64_t a= AV_RN64(pixels );\
916 uint64_t b= AV_RN64(pixels+1);\
59fe111e
MN
917 l1= (a&0x0303030303030303ULL)\
918 + (b&0x0303030303030303ULL);\
919 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
920 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
921 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
922 pixels+=line_size;\
923 block +=line_size;\
905694d9
RS
924 a= AV_RN64(pixels );\
925 b= AV_RN64(pixels+1);\
59fe111e
MN
926 l0= (a&0x0303030303030303ULL)\
927 + (b&0x0303030303030303ULL)\
928 + 0x0101010101010101ULL;\
929 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
930 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
931 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
932 pixels+=line_size;\
933 block +=line_size;\
934 }\
935}\
936\
45553457
ZK
937CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
938CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
939CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
940CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
941CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
942CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
943CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
59fe111e
MN
944
945#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
946#else // 64 bit variant
947
948#define PIXOP2(OPNAME, OP) \
669ac79c
MN
949static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
950 int i;\
951 for(i=0; i<h; i++){\
905694d9 952 OP(*((uint16_t*)(block )), AV_RN16(pixels ));\
669ac79c
MN
953 pixels+=line_size;\
954 block +=line_size;\
955 }\
956}\
0da71265
MN
957static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
958 int i;\
959 for(i=0; i<h; i++){\
905694d9 960 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
0da71265
MN
961 pixels+=line_size;\
962 block +=line_size;\
963 }\
964}\
45553457 965static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
59fe111e
MN
966 int i;\
967 for(i=0; i<h; i++){\
905694d9
RS
968 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
969 OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
59fe111e
MN
970 pixels+=line_size;\
971 block +=line_size;\
972 }\
973}\
45553457
ZK
974static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
975 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
b3184779 976}\
59fe111e 977\
b3184779
MN
978static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
979 int src_stride1, int src_stride2, int h){\
59fe111e
MN
980 int i;\
981 for(i=0; i<h; i++){\
b3184779 982 uint32_t a,b;\
905694d9
RS
983 a= AV_RN32(&src1[i*src_stride1 ]);\
984 b= AV_RN32(&src2[i*src_stride2 ]);\
d8085ea7 985 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
905694d9
RS
986 a= AV_RN32(&src1[i*src_stride1+4]);\
987 b= AV_RN32(&src2[i*src_stride2+4]);\
d8085ea7 988 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
59fe111e
MN
989 }\
990}\
991\
b3184779
MN
992static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
993 int src_stride1, int src_stride2, int h){\
59fe111e
MN
994 int i;\
995 for(i=0; i<h; i++){\
b3184779 996 uint32_t a,b;\
905694d9
RS
997 a= AV_RN32(&src1[i*src_stride1 ]);\
998 b= AV_RN32(&src2[i*src_stride2 ]);\
d8085ea7 999 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
905694d9
RS
1000 a= AV_RN32(&src1[i*src_stride1+4]);\
1001 b= AV_RN32(&src2[i*src_stride2+4]);\
d8085ea7 1002 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
59fe111e
MN
1003 }\
1004}\
1005\
0da71265
MN
1006static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
1007 int src_stride1, int src_stride2, int h){\
1008 int i;\
1009 for(i=0; i<h; i++){\
1010 uint32_t a,b;\
905694d9
RS
1011 a= AV_RN32(&src1[i*src_stride1 ]);\
1012 b= AV_RN32(&src2[i*src_stride2 ]);\
d8085ea7 1013 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
0da71265
MN
1014 }\
1015}\
1016\
669ac79c
MN
1017static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
1018 int src_stride1, int src_stride2, int h){\
1019 int i;\
1020 for(i=0; i<h; i++){\
1021 uint32_t a,b;\
905694d9
RS
1022 a= AV_RN16(&src1[i*src_stride1 ]);\
1023 b= AV_RN16(&src2[i*src_stride2 ]);\
669ac79c
MN
1024 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
1025 }\
1026}\
1027\
b3184779
MN
1028static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
1029 int src_stride1, int src_stride2, int h){\
1030 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
1031 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
1032}\
1033\
1034static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
1035 int src_stride1, int src_stride2, int h){\
1036 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
1037 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
1038}\
1039\
45553457 1040static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
1041 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1042}\
1043\
45553457 1044static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
1045 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1046}\
1047\
45553457 1048static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
1049 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1050}\
1051\
45553457 1052static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
1053 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1054}\
1055\
1056static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1057 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
59fe111e
MN
1058 int i;\
1059 for(i=0; i<h; i++){\
b3184779 1060 uint32_t a, b, c, d, l0, l1, h0, h1;\
905694d9
RS
1061 a= AV_RN32(&src1[i*src_stride1]);\
1062 b= AV_RN32(&src2[i*src_stride2]);\
1063 c= AV_RN32(&src3[i*src_stride3]);\
1064 d= AV_RN32(&src4[i*src_stride4]);\
b3184779
MN
1065 l0= (a&0x03030303UL)\
1066 + (b&0x03030303UL)\
1067 + 0x02020202UL;\
1068 h0= ((a&0xFCFCFCFCUL)>>2)\
1069 + ((b&0xFCFCFCFCUL)>>2);\
1070 l1= (c&0x03030303UL)\
1071 + (d&0x03030303UL);\
1072 h1= ((c&0xFCFCFCFCUL)>>2)\
1073 + ((d&0xFCFCFCFCUL)>>2);\
1074 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
905694d9
RS
1075 a= AV_RN32(&src1[i*src_stride1+4]);\
1076 b= AV_RN32(&src2[i*src_stride2+4]);\
1077 c= AV_RN32(&src3[i*src_stride3+4]);\
1078 d= AV_RN32(&src4[i*src_stride4+4]);\
b3184779
MN
1079 l0= (a&0x03030303UL)\
1080 + (b&0x03030303UL)\
1081 + 0x02020202UL;\
1082 h0= ((a&0xFCFCFCFCUL)>>2)\
1083 + ((b&0xFCFCFCFCUL)>>2);\
1084 l1= (c&0x03030303UL)\
1085 + (d&0x03030303UL);\
1086 h1= ((c&0xFCFCFCFCUL)>>2)\
1087 + ((d&0xFCFCFCFCUL)>>2);\
1088 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
59fe111e
MN
1089 }\
1090}\
669ac79c
MN
1091\
1092static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1093 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1094}\
1095\
1096static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1097 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1098}\
1099\
1100static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1101 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1102}\
1103\
1104static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1105 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1106}\
1107\
b3184779
MN
1108static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1109 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
59fe111e
MN
1110 int i;\
1111 for(i=0; i<h; i++){\
b3184779 1112 uint32_t a, b, c, d, l0, l1, h0, h1;\
905694d9
RS
1113 a= AV_RN32(&src1[i*src_stride1]);\
1114 b= AV_RN32(&src2[i*src_stride2]);\
1115 c= AV_RN32(&src3[i*src_stride3]);\
1116 d= AV_RN32(&src4[i*src_stride4]);\
b3184779
MN
1117 l0= (a&0x03030303UL)\
1118 + (b&0x03030303UL)\
1119 + 0x01010101UL;\
1120 h0= ((a&0xFCFCFCFCUL)>>2)\
1121 + ((b&0xFCFCFCFCUL)>>2);\
1122 l1= (c&0x03030303UL)\
1123 + (d&0x03030303UL);\
1124 h1= ((c&0xFCFCFCFCUL)>>2)\
1125 + ((d&0xFCFCFCFCUL)>>2);\
1126 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
905694d9
RS
1127 a= AV_RN32(&src1[i*src_stride1+4]);\
1128 b= AV_RN32(&src2[i*src_stride2+4]);\
1129 c= AV_RN32(&src3[i*src_stride3+4]);\
1130 d= AV_RN32(&src4[i*src_stride4+4]);\
b3184779
MN
1131 l0= (a&0x03030303UL)\
1132 + (b&0x03030303UL)\
1133 + 0x01010101UL;\
1134 h0= ((a&0xFCFCFCFCUL)>>2)\
1135 + ((b&0xFCFCFCFCUL)>>2);\
1136 l1= (c&0x03030303UL)\
1137 + (d&0x03030303UL);\
1138 h1= ((c&0xFCFCFCFCUL)>>2)\
1139 + ((d&0xFCFCFCFCUL)>>2);\
1140 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
59fe111e
MN
1141 }\
1142}\
b3184779
MN
1143static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1144 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1145 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1146 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1147}\
1148static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1149 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1150 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1151 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1152}\
59fe111e 1153\
669ac79c
MN
1154static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1155{\
1156 int i, a0, b0, a1, b1;\
1157 a0= pixels[0];\
1158 b0= pixels[1] + 2;\
1159 a0 += b0;\
1160 b0 += pixels[2];\
1161\
1162 pixels+=line_size;\
1163 for(i=0; i<h; i+=2){\
1164 a1= pixels[0];\
1165 b1= pixels[1];\
1166 a1 += b1;\
1167 b1 += pixels[2];\
1168\
1169 block[0]= (a1+a0)>>2; /* FIXME non put */\
1170 block[1]= (b1+b0)>>2;\
1171\
1172 pixels+=line_size;\
1173 block +=line_size;\
1174\
1175 a0= pixels[0];\
1176 b0= pixels[1] + 2;\
1177 a0 += b0;\
1178 b0 += pixels[2];\
1179\
1180 block[0]= (a1+a0)>>2;\
1181 block[1]= (b1+b0)>>2;\
1182 pixels+=line_size;\
1183 block +=line_size;\
1184 }\
1185}\
1186\
1187static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1188{\
1189 int i;\
905694d9
RS
1190 const uint32_t a= AV_RN32(pixels );\
1191 const uint32_t b= AV_RN32(pixels+1);\
669ac79c
MN
1192 uint32_t l0= (a&0x03030303UL)\
1193 + (b&0x03030303UL)\
1194 + 0x02020202UL;\
1195 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1196 + ((b&0xFCFCFCFCUL)>>2);\
1197 uint32_t l1,h1;\
1198\
1199 pixels+=line_size;\
1200 for(i=0; i<h; i+=2){\
905694d9
RS
1201 uint32_t a= AV_RN32(pixels );\
1202 uint32_t b= AV_RN32(pixels+1);\
669ac79c
MN
1203 l1= (a&0x03030303UL)\
1204 + (b&0x03030303UL);\
1205 h1= ((a&0xFCFCFCFCUL)>>2)\
1206 + ((b&0xFCFCFCFCUL)>>2);\
1207 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1208 pixels+=line_size;\
1209 block +=line_size;\
905694d9
RS
1210 a= AV_RN32(pixels );\
1211 b= AV_RN32(pixels+1);\
669ac79c
MN
1212 l0= (a&0x03030303UL)\
1213 + (b&0x03030303UL)\
1214 + 0x02020202UL;\
1215 h0= ((a&0xFCFCFCFCUL)>>2)\
1216 + ((b&0xFCFCFCFCUL)>>2);\
1217 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1218 pixels+=line_size;\
1219 block +=line_size;\
1220 }\
1221}\
1222\
45553457 1223static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
1224{\
1225 int j;\
1226 for(j=0; j<2; j++){\
1227 int i;\
905694d9
RS
1228 const uint32_t a= AV_RN32(pixels );\
1229 const uint32_t b= AV_RN32(pixels+1);\
59fe111e
MN
1230 uint32_t l0= (a&0x03030303UL)\
1231 + (b&0x03030303UL)\
1232 + 0x02020202UL;\
1233 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1234 + ((b&0xFCFCFCFCUL)>>2);\
1235 uint32_t l1,h1;\
1236\
1237 pixels+=line_size;\
1238 for(i=0; i<h; i+=2){\
905694d9
RS
1239 uint32_t a= AV_RN32(pixels );\
1240 uint32_t b= AV_RN32(pixels+1);\
59fe111e
MN
1241 l1= (a&0x03030303UL)\
1242 + (b&0x03030303UL);\
1243 h1= ((a&0xFCFCFCFCUL)>>2)\
1244 + ((b&0xFCFCFCFCUL)>>2);\
1245 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1246 pixels+=line_size;\
1247 block +=line_size;\
905694d9
RS
1248 a= AV_RN32(pixels );\
1249 b= AV_RN32(pixels+1);\
59fe111e
MN
1250 l0= (a&0x03030303UL)\
1251 + (b&0x03030303UL)\
1252 + 0x02020202UL;\
1253 h0= ((a&0xFCFCFCFCUL)>>2)\
1254 + ((b&0xFCFCFCFCUL)>>2);\
1255 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1256 pixels+=line_size;\
1257 block +=line_size;\
1258 }\
1259 pixels+=4-line_size*(h+1);\
1260 block +=4-line_size*h;\
1261 }\
1262}\
1263\
45553457 1264static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
1265{\
1266 int j;\
1267 for(j=0; j<2; j++){\
1268 int i;\
905694d9
RS
1269 const uint32_t a= AV_RN32(pixels );\
1270 const uint32_t b= AV_RN32(pixels+1);\
59fe111e
MN
1271 uint32_t l0= (a&0x03030303UL)\
1272 + (b&0x03030303UL)\
1273 + 0x01010101UL;\
1274 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1275 + ((b&0xFCFCFCFCUL)>>2);\
1276 uint32_t l1,h1;\
1277\
1278 pixels+=line_size;\
1279 for(i=0; i<h; i+=2){\
905694d9
RS
1280 uint32_t a= AV_RN32(pixels );\
1281 uint32_t b= AV_RN32(pixels+1);\
59fe111e
MN
1282 l1= (a&0x03030303UL)\
1283 + (b&0x03030303UL);\
1284 h1= ((a&0xFCFCFCFCUL)>>2)\
1285 + ((b&0xFCFCFCFCUL)>>2);\
1286 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1287 pixels+=line_size;\
1288 block +=line_size;\
905694d9
RS
1289 a= AV_RN32(pixels );\
1290 b= AV_RN32(pixels+1);\
59fe111e
MN
1291 l0= (a&0x03030303UL)\
1292 + (b&0x03030303UL)\
1293 + 0x01010101UL;\
1294 h0= ((a&0xFCFCFCFCUL)>>2)\
1295 + ((b&0xFCFCFCFCUL)>>2);\
1296 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1297 pixels+=line_size;\
1298 block +=line_size;\
1299 }\
1300 pixels+=4-line_size*(h+1);\
1301 block +=4-line_size*h;\
1302 }\
1303}\
1304\
45553457
ZK
1305CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
1306CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
1307CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
1308CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
1309CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
1310CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
1311CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
1312CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
b3184779 1313
d8085ea7 1314#define op_avg(a, b) a = rnd_avg32(a, b)
59fe111e 1315#endif
59fe111e
MN
1316#define op_put(a, b) a = b
1317
1318PIXOP2(avg, op_avg)
1319PIXOP2(put, op_put)
1320#undef op_avg
1321#undef op_put
1322
de6d9b64
FB
1323#define avg2(a,b) ((a+b+1)>>1)
1324#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
1325
c0a0170c
MN
1326static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
1327 put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
1328}
1329
1330static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
1331 put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
1332}
073b013d 1333
0c1a9eda 1334static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
44eb4951
MN
1335{
1336 const int A=(16-x16)*(16-y16);
1337 const int B=( x16)*(16-y16);
1338 const int C=(16-x16)*( y16);
1339 const int D=( x16)*( y16);
1340 int i;
44eb4951
MN
1341
1342 for(i=0; i<h; i++)
1343 {
b3184779
MN
1344 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
1345 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
1346 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
1347 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
1348 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
1349 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
1350 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
1351 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
1352 dst+= stride;
1353 src+= stride;
44eb4951
MN
1354 }
1355}
1356
703c8195 1357void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
073b013d
MN
1358 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
1359{
1360 int y, vx, vy;
1361 const int s= 1<<shift;
115329f1 1362
073b013d
MN
1363 width--;
1364 height--;
1365
1366 for(y=0; y<h; y++){
1367 int x;
1368
1369 vx= ox;
1370 vy= oy;
1371 for(x=0; x<8; x++){ //XXX FIXME optimize
1372 int src_x, src_y, frac_x, frac_y, index;
1373
1374 src_x= vx>>16;
1375 src_y= vy>>16;
1376 frac_x= src_x&(s-1);
1377 frac_y= src_y&(s-1);
1378 src_x>>=shift;
1379 src_y>>=shift;
115329f1 1380
073b013d
MN
1381 if((unsigned)src_x < width){
1382 if((unsigned)src_y < height){
1383 index= src_x + src_y*stride;
1384 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
1385 + src[index +1]* frac_x )*(s-frac_y)
1386 + ( src[index+stride ]*(s-frac_x)
1387 + src[index+stride+1]* frac_x )* frac_y
1388 + r)>>(shift*2);
1389 }else{
f66e4f5f 1390 index= src_x + av_clip(src_y, 0, height)*stride;
115329f1 1391 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
073b013d
MN
1392 + src[index +1]* frac_x )*s
1393 + r)>>(shift*2);
1394 }
1395 }else{
1396 if((unsigned)src_y < height){
f66e4f5f 1397 index= av_clip(src_x, 0, width) + src_y*stride;
115329f1 1398 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
073b013d
MN
1399 + src[index+stride ]* frac_y )*s
1400 + r)>>(shift*2);
1401 }else{
f66e4f5f 1402 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
073b013d
MN
1403 dst[y*stride + x]= src[index ];
1404 }
1405 }
115329f1 1406
073b013d
MN
1407 vx+= dxx;
1408 vy+= dyx;
1409 }
1410 ox += dxy;
1411 oy += dyy;
1412 }
1413}
669ac79c
MN
1414
1415static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1416 switch(width){
1417 case 2: put_pixels2_c (dst, src, stride, height); break;
1418 case 4: put_pixels4_c (dst, src, stride, height); break;
1419 case 8: put_pixels8_c (dst, src, stride, height); break;
1420 case 16:put_pixels16_c(dst, src, stride, height); break;
1421 }
1422}
1423
1424static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1425 int i,j;
1426 for (i=0; i < height; i++) {
1427 for (j=0; j < width; j++) {
bb270c08 1428 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
669ac79c
MN
1429 }
1430 src += stride;
1431 dst += stride;
1432 }
1433}
1434
1435static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1436 int i,j;
1437 for (i=0; i < height; i++) {
1438 for (j=0; j < width; j++) {
bb270c08 1439 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
669ac79c
MN
1440 }
1441 src += stride;
1442 dst += stride;
1443 }
1444}
115329f1 1445
669ac79c
MN
1446static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1447 int i,j;
1448 for (i=0; i < height; i++) {
1449 for (j=0; j < width; j++) {
bb270c08 1450 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
669ac79c
MN
1451 }
1452 src += stride;
1453 dst += stride;
1454 }
1455}
115329f1 1456
669ac79c
MN
1457static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1458 int i,j;
1459 for (i=0; i < height; i++) {
1460 for (j=0; j < width; j++) {
bb270c08 1461 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
1462 }
1463 src += stride;
1464 dst += stride;
1465 }
1466}
1467
1468static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1469 int i,j;
1470 for (i=0; i < height; i++) {
1471 for (j=0; j < width; j++) {
bb270c08 1472 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
1473 }
1474 src += stride;
1475 dst += stride;
1476 }
1477}
1478
1479static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1480 int i,j;
1481 for (i=0; i < height; i++) {
1482 for (j=0; j < width; j++) {
bb270c08 1483 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
669ac79c
MN
1484 }
1485 src += stride;
1486 dst += stride;
1487 }
1488}
1489
1490static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1491 int i,j;
1492 for (i=0; i < height; i++) {
1493 for (j=0; j < width; j++) {
bb270c08 1494 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
1495 }
1496 src += stride;
1497 dst += stride;
1498 }
1499}
1500
1501static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1502 int i,j;
1503 for (i=0; i < height; i++) {
1504 for (j=0; j < width; j++) {
bb270c08 1505 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
1506 }
1507 src += stride;
1508 dst += stride;
1509 }
1510}
da3b9756
MM
1511
1512static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1513 switch(width){
1514 case 2: avg_pixels2_c (dst, src, stride, height); break;
1515 case 4: avg_pixels4_c (dst, src, stride, height); break;
1516 case 8: avg_pixels8_c (dst, src, stride, height); break;
1517 case 16:avg_pixels16_c(dst, src, stride, height); break;
1518 }
1519}
1520
1521static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1522 int i,j;
1523 for (i=0; i < height; i++) {
1524 for (j=0; j < width; j++) {
bb270c08 1525 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
1526 }
1527 src += stride;
1528 dst += stride;
1529 }
1530}
1531
1532static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1533 int i,j;
1534 for (i=0; i < height; i++) {
1535 for (j=0; j < width; j++) {
bb270c08 1536 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
1537 }
1538 src += stride;
1539 dst += stride;
1540 }
1541}
115329f1 1542
da3b9756
MM
1543static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1544 int i,j;
1545 for (i=0; i < height; i++) {
1546 for (j=0; j < width; j++) {
bb270c08 1547 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
1548 }
1549 src += stride;
1550 dst += stride;
1551 }
1552}
115329f1 1553
da3b9756
MM
1554static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1555 int i,j;
1556 for (i=0; i < height; i++) {
1557 for (j=0; j < width; j++) {
bb270c08 1558 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
1559 }
1560 src += stride;
1561 dst += stride;
1562 }
1563}
1564
1565static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1566 int i,j;
1567 for (i=0; i < height; i++) {
1568 for (j=0; j < width; j++) {
bb270c08 1569 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
1570 }
1571 src += stride;
1572 dst += stride;
1573 }
1574}
1575
1576static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1577 int i,j;
1578 for (i=0; i < height; i++) {
1579 for (j=0; j < width; j++) {
bb270c08 1580 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
1581 }
1582 src += stride;
1583 dst += stride;
1584 }
1585}
1586
1587static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1588 int i,j;
1589 for (i=0; i < height; i++) {
1590 for (j=0; j < width; j++) {
bb270c08 1591 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
1592 }
1593 src += stride;
1594 dst += stride;
1595 }
1596}
1597
1598static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1599 int i,j;
1600 for (i=0; i < height; i++) {
1601 for (j=0; j < width; j++) {
bb270c08 1602 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
1603 }
1604 src += stride;
1605 dst += stride;
1606 }
1607}
669ac79c
MN
1608#if 0
1609#define TPEL_WIDTH(width)\
1610static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1611 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
1612static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1613 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
1614static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1615 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
1616static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1617 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
1618static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1619 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
1620static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1621 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
1622static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1623 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
1624static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1625 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
1626static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1627 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
1628#endif
1629
0da71265
MN
1630#define H264_CHROMA_MC(OPNAME, OP)\
1631static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1632 const int A=(8-x)*(8-y);\
1633 const int B=( x)*(8-y);\
1634 const int C=(8-x)*( y);\
1635 const int D=( x)*( y);\
1636 int i;\
1637 \
1638 assert(x<8 && y<8 && x>=0 && y>=0);\
1639\
febdd0b9 1640 if(D){\
f315b394 1641 for(i=0; i<h; i++){\
76abb18e
MN
1642 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1643 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1644 dst+= stride;\
1645 src+= stride;\
1646 }\
febdd0b9
MN
1647 }else{\
1648 const int E= B+C;\
1649 const int step= C ? stride : 1;\
f315b394 1650 for(i=0; i<h; i++){\
febdd0b9
MN
1651 OP(dst[0], (A*src[0] + E*src[step+0]));\
1652 OP(dst[1], (A*src[1] + E*src[step+1]));\
1653 dst+= stride;\
1654 src+= stride;\
1655 }\
1656 }\
0da71265
MN
1657}\
1658\
1659static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1660 const int A=(8-x)*(8-y);\
1661 const int B=( x)*(8-y);\
1662 const int C=(8-x)*( y);\
1663 const int D=( x)*( y);\
1664 int i;\
1665 \
1666 assert(x<8 && y<8 && x>=0 && y>=0);\
1667\
febdd0b9 1668 if(D){\
f315b394 1669 for(i=0; i<h; i++){\
76abb18e
MN
1670 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1671 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1672 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1673 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1674 dst+= stride;\
1675 src+= stride;\
1676 }\
febdd0b9
MN
1677 }else{\
1678 const int E= B+C;\
1679 const int step= C ? stride : 1;\
f315b394 1680 for(i=0; i<h; i++){\
febdd0b9
MN
1681 OP(dst[0], (A*src[0] + E*src[step+0]));\
1682 OP(dst[1], (A*src[1] + E*src[step+1]));\
1683 OP(dst[2], (A*src[2] + E*src[step+2]));\
1684 OP(dst[3], (A*src[3] + E*src[step+3]));\
1685 dst+= stride;\
1686 src+= stride;\
1687 }\
1688 }\
0da71265
MN
1689}\
1690\
1691static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1692 const int A=(8-x)*(8-y);\
1693 const int B=( x)*(8-y);\
1694 const int C=(8-x)*( y);\
1695 const int D=( x)*( y);\
1696 int i;\
1697 \
1698 assert(x<8 && y<8 && x>=0 && y>=0);\
1699\
815c81c0 1700 if(D){\
f315b394 1701 for(i=0; i<h; i++){\
76abb18e
MN
1702 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1703 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1704 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1705 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1706 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
1707 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
1708 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
1709 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
1710 dst+= stride;\
1711 src+= stride;\
1712 }\
815c81c0
MN
1713 }else{\
1714 const int E= B+C;\
1715 const int step= C ? stride : 1;\
f315b394 1716 for(i=0; i<h; i++){\
815c81c0
MN
1717 OP(dst[0], (A*src[0] + E*src[step+0]));\
1718 OP(dst[1], (A*src[1] + E*src[step+1]));\
1719 OP(dst[2], (A*src[2] + E*src[step+2]));\
1720 OP(dst[3], (A*src[3] + E*src[step+3]));\
1721 OP(dst[4], (A*src[4] + E*src[step+4]));\
1722 OP(dst[5], (A*src[5] + E*src[step+5]));\
1723 OP(dst[6], (A*src[6] + E*src[step+6]));\
1724 OP(dst[7], (A*src[7] + E*src[step+7]));\
1725 dst+= stride;\
1726 src+= stride;\
1727 }\
1728 }\
0da71265
MN
1729}
1730
1731#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
1732#define op_put(a, b) a = (((b) + 32)>>6)
1733
1734H264_CHROMA_MC(put_ , op_put)
1735H264_CHROMA_MC(avg_ , op_avg)
1736#undef op_avg
1737#undef op_put
1738
c374691b 1739static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
e34350a3
KS
1740 const int A=(8-x)*(8-y);
1741 const int B=( x)*(8-y);
1742 const int C=(8-x)*( y);
1743 const int D=( x)*( y);
1744 int i;
1745
1746 assert(x<8 && y<8 && x>=0 && y>=0);
1747
1748 for(i=0; i<h; i++)
1749 {
1750 dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6;
1751 dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6;
1752 dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6;
1753 dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6;
1754 dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6;
1755 dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6;
1756 dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6;
1757 dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6;
1758 dst+= stride;
1759 src+= stride;
1760 }
1761}
1762
8013da73
DC
1763static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
1764 const int A=(8-x)*(8-y);
1765 const int B=( x)*(8-y);
1766 const int C=(8-x)*( y);
1767 const int D=( x)*( y);
1768 int i;
1769
1770 assert(x<8 && y<8 && x>=0 && y>=0);
1771
1772 for(i=0; i<h; i++)
1773 {
1774 dst[0] = avg2(dst[0], ((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6));
1775 dst[1] = avg2(dst[1], ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6));
1776 dst[2] = avg2(dst[2], ((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6));
1777 dst[3] = avg2(dst[3], ((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6));
1778 dst[4] = avg2(dst[4], ((A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6));
1779 dst[5] = avg2(dst[5], ((A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6));
1780 dst[6] = avg2(dst[6], ((A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6));
1781 dst[7] = avg2(dst[7], ((A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6));
1782 dst+= stride;
1783 src+= stride;
1784 }
1785}
1786
b3184779 1787#define QPEL_MC(r, OPNAME, RND, OP) \
0c1a9eda 1788static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
55fde95e 1789 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779
MN
1790 int i;\
1791 for(i=0; i<h; i++)\
1792 {\
1793 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
1794 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
1795 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
1796 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
1797 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
1798 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
1799 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
1800 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
1801 dst+=dstStride;\
1802 src+=srcStride;\
1803 }\
44eb4951
MN
1804}\
1805\
0c1a9eda 1806static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
db794953 1807 const int w=8;\
55fde95e 1808 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779
MN
1809 int i;\
1810 for(i=0; i<w; i++)\
1811 {\
1812 const int src0= src[0*srcStride];\
1813 const int src1= src[1*srcStride];\
1814 const int src2= src[2*srcStride];\
1815 const int src3= src[3*srcStride];\
1816 const int src4= src[4*srcStride];\
1817 const int src5= src[5*srcStride];\
1818 const int src6= src[6*srcStride];\
1819 const int src7= src[7*srcStride];\
1820 const int src8= src[8*srcStride];\
1821 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
1822 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
1823 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
1824 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
1825 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
1826 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
1827 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
1828 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
1829 dst++;\
1830 src++;\
1831 }\
1832}\
1833\
0c1a9eda 1834static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
55fde95e 1835 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779 1836 int i;\
826f429a 1837 \
b3184779
MN
1838 for(i=0; i<h; i++)\
1839 {\
1840 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
1841 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
1842 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
1843 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
1844 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
1845 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
1846 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
1847 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
1848 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
1849 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
1850 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
1851 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
1852 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
1853 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
1854 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
1855 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
1856 dst+=dstStride;\
1857 src+=srcStride;\
1858 }\
1859}\
1860\
0c1a9eda 1861static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
55fde95e 1862 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779 1863 int i;\
826f429a 1864 const int w=16;\
b3184779
MN
1865 for(i=0; i<w; i++)\
1866 {\
1867 const int src0= src[0*srcStride];\
1868 const int src1= src[1*srcStride];\
1869 const int src2= src[2*srcStride];\
1870 const int src3= src[3*srcStride];\
1871 const int src4= src[4*srcStride];\
1872 const int src5= src[5*srcStride];\
1873 const int src6= src[6*srcStride];\
1874 const int src7= src[7*srcStride];\
1875 const int src8= src[8*srcStride];\
1876 const int src9= src[9*srcStride];\
1877 const int src10= src[10*srcStride];\
1878 const int src11= src[11*srcStride];\
1879 const int src12= src[12*srcStride];\
1880 const int src13= src[13*srcStride];\
1881 const int src14= src[14*srcStride];\
1882 const int src15= src[15*srcStride];\
1883 const int src16= src[16*srcStride];\
1884 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
1885 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
1886 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
1887 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
1888 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
1889 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
1890 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
1891 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
1892 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
1893 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
1894 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
1895 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
1896 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
1897 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
1898 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
1899 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
1900 dst++;\
1901 src++;\
1902 }\
1903}\
1904\
0c1a9eda 1905static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
45553457 1906 OPNAME ## pixels8_c(dst, src, stride, 8);\
b3184779
MN
1907}\
1908\
0c1a9eda
ZK
1909static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1910 uint8_t half[64];\
b3184779
MN
1911 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1912 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
44eb4951
MN
1913}\
1914\
0c1a9eda 1915static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
b3184779 1916 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
44eb4951
MN
1917}\
1918\
0c1a9eda
ZK
1919static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1920 uint8_t half[64];\
b3184779
MN
1921 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1922 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
44eb4951
MN
1923}\
1924\
0c1a9eda
ZK
1925static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1926 uint8_t full[16*9];\
1927 uint8_t half[64];\
b3184779 1928 copy_block9(full, src, 16, stride, 9);\
db794953 1929 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
b3184779 1930 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
44eb4951
MN
1931}\
1932\
0c1a9eda
ZK
1933static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1934 uint8_t full[16*9];\
b3184779 1935 copy_block9(full, src, 16, stride, 9);\
db794953 1936 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
44eb4951
MN
1937}\
1938\
0c1a9eda
ZK
1939static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1940 uint8_t full[16*9];\
1941 uint8_t half[64];\
b3184779 1942 copy_block9(full, src, 16, stride, 9);\
db794953 1943 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
b3184779 1944 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
44eb4951 1945}\
0c1a9eda
ZK
1946void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1947 uint8_t full[16*9];\
1948 uint8_t halfH[72];\
1949 uint8_t halfV[64];\
1950 uint8_t halfHV[64];\
b3184779
MN
1951 copy_block9(full, src, 16, stride, 9);\
1952 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1953 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1954 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1955 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1956}\
0c1a9eda
ZK
1957static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1958 uint8_t full[16*9];\
1959 uint8_t halfH[72];\
1960 uint8_t halfHV[64];\
db794953
MN
1961 copy_block9(full, src, 16, stride, 9);\
1962 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1963 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1964 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1965 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1966}\
0c1a9eda
ZK
1967void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1968 uint8_t full[16*9];\
1969 uint8_t halfH[72];\
1970 uint8_t halfV[64];\
1971 uint8_t halfHV[64];\
b3184779
MN
1972 copy_block9(full, src, 16, stride, 9);\
1973 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1974 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1975 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1976 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1977}\
0c1a9eda
ZK
1978static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1979 uint8_t full[16*9];\
1980 uint8_t halfH[72];\
1981 uint8_t halfHV[64];\
db794953
MN
1982 copy_block9(full, src, 16, stride, 9);\
1983 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1984 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1985 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1986 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1987}\
0c1a9eda
ZK
1988void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1989 uint8_t full[16*9];\
1990 uint8_t halfH[72];\
1991 uint8_t halfV[64];\
1992 uint8_t halfHV[64];\
b3184779
MN
1993 copy_block9(full, src, 16, stride, 9);\
1994 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1995 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1996 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1997 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1998}\
0c1a9eda
ZK
1999static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2000 uint8_t full[16*9];\
2001 uint8_t halfH[72];\
2002 uint8_t halfHV[64];\
db794953
MN
2003 copy_block9(full, src, 16, stride, 9);\
2004 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
2005 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
2006 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
2007 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
2008}\
0c1a9eda
ZK
2009void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
2010 uint8_t full[16*9];\
2011 uint8_t halfH[72];\
2012 uint8_t halfV[64];\
2013 uint8_t halfHV[64];\
b3184779
MN
2014 copy_block9(full, src, 16, stride, 9);\
2015 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
db794953
MN
2016 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
2017 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 2018 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 2019}\
0c1a9eda
ZK
2020static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2021 uint8_t full[16*9];\
2022 uint8_t halfH[72];\
2023 uint8_t halfHV[64];\
db794953
MN
2024 copy_block9(full, src, 16, stride, 9);\
2025 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
2026 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
2027 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
2028 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
2029}\
0c1a9eda
ZK
2030static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2031 uint8_t halfH[72];\
2032 uint8_t halfHV[64];\
b3184779 2033 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 2034 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 2035 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
44eb4951 2036}\
0c1a9eda
ZK
2037static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2038 uint8_t halfH[72];\
2039 uint8_t halfHV[64];\
b3184779 2040 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 2041 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 2042 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
44eb4951 2043}\
0c1a9eda
ZK
2044void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
2045 uint8_t full[16*9];\
2046 uint8_t halfH[72];\
2047 uint8_t halfV[64];\
2048 uint8_t halfHV[64];\
b3184779
MN
2049 copy_block9(full, src, 16, stride, 9);\
2050 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
2051 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
2052 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 2053 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 2054}\
0c1a9eda
ZK
2055static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2056 uint8_t full[16*9];\
2057 uint8_t halfH[72];\
db794953
MN
2058 copy_block9(full, src, 16, stride, 9);\
2059 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
2060 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
2061 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
2062}\
0c1a9eda
ZK
2063void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
2064 uint8_t full[16*9];\
2065 uint8_t halfH[72];\
2066 uint8_t halfV[64];\
2067 uint8_t halfHV[64];\
b3184779
MN
2068 copy_block9(full, src, 16, stride, 9);\
2069 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
2070 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
2071 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 2072 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 2073}\
0c1a9eda
ZK
2074static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2075 uint8_t full[16*9];\
2076 uint8_t halfH[72];\
db794953
MN
2077 copy_block9(full, src, 16, stride, 9);\
2078 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
2079 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
2080 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
2081}\
0c1a9eda
ZK
2082static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2083 uint8_t halfH[72];\
b3184779 2084 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 2085 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
b3184779 2086}\
0c1a9eda 2087static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
45553457 2088 OPNAME ## pixels16_c(dst, src, stride, 16);\
b3184779
MN
2089}\
2090\
0c1a9eda
ZK
2091static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2092 uint8_t half[256];\
b3184779
MN
2093 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
2094 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
2095}\
2096\
0c1a9eda 2097static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
b3184779 2098 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
44eb4951 2099}\
b3184779 2100\
0c1a9eda
ZK
2101static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2102 uint8_t half[256];\
b3184779
MN
2103 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
2104 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
2105}\
2106\
0c1a9eda
ZK
2107static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2108 uint8_t full[24*17];\
2109 uint8_t half[256];\
b3184779 2110 copy_block17(full, src, 24, stride, 17);\
826f429a 2111 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
b3184779
MN
2112 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
2113}\
2114\
0c1a9eda
ZK
2115static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2116 uint8_t full[24*17];\
b3184779 2117 copy_block17(full, src, 24, stride, 17);\
826f429a 2118 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
b3184779
MN
2119}\
2120\
0c1a9eda
ZK
2121static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2122 uint8_t full[24*17];\
2123 uint8_t half[256];\
b3184779 2124 copy_block17(full, src, 24, stride, 17);\
826f429a 2125 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
b3184779
MN
2126 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
2127}\
0c1a9eda
ZK
2128void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
2129 uint8_t full[24*17];\
2130 uint8_t halfH[272];\
2131 uint8_t halfV[256];\
2132 uint8_t halfHV[256];\
b3184779
MN
2133 copy_block17(full, src, 24, stride, 17);\
2134 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
2135 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2136 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
2137 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2138}\
0c1a9eda
ZK
2139static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2140 uint8_t full[24*17];\
2141 uint8_t halfH[272];\
2142 uint8_t halfHV[256];\
db794953
MN
2143 copy_block17(full, src, 24, stride, 17);\
2144 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2145 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2146 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2147 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2148}\
0c1a9eda
ZK
2149void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
2150 uint8_t full[24*17];\
2151 uint8_t halfH[272];\
2152 uint8_t halfV[256];\
2153 uint8_t halfHV[256];\
b3184779
MN
2154 copy_block17(full, src, 24, stride, 17);\
2155 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
2156 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2157 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
2158 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2159}\
0c1a9eda
ZK
2160static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2161 uint8_t full[24*17];\
2162 uint8_t halfH[272];\
2163 uint8_t halfHV[256];\
db794953
MN
2164 copy_block17(full, src, 24, stride, 17);\
2165 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2166 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2167 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2168 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2169}\
0c1a9eda
ZK
2170void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
2171 uint8_t full[24*17];\
2172 uint8_t halfH[272];\
2173 uint8_t halfV[256];\
2174 uint8_t halfHV[256];\
b3184779
MN
2175 copy_block17(full, src, 24, stride, 17);\
2176 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
2177 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2178 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
2179 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2180}\
0c1a9eda
ZK
2181static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2182 uint8_t full[24*17];\
2183 uint8_t halfH[272];\
2184 uint8_t halfHV[256];\
db794953
MN
2185 copy_block17(full, src, 24, stride, 17);\
2186 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2187 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2188 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2189 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2190}\
0c1a9eda
ZK
2191void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
2192 uint8_t full[24*17];\
2193 uint8_t halfH[272];\
2194 uint8_t halfV[256];\
2195 uint8_t halfHV[256];\
b3184779
MN
2196 copy_block17(full, src, 24, stride, 17);\
2197 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
826f429a
MN
2198 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2199 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
2200 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2201}\
0c1a9eda
ZK
2202static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2203 uint8_t full[24*17];\
2204 uint8_t halfH[272];\
2205 uint8_t halfHV[256];\
db794953
MN
2206 copy_block17(full, src, 24, stride, 17);\
2207 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2208 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2209 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2210 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2211}\
0c1a9eda
ZK
2212static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2213 uint8_t halfH[272];\
2214 uint8_t halfHV[256];\
b3184779 2215 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 2216 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
2217 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2218}\
0c1a9eda
ZK
2219static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2220 uint8_t halfH[272];\
2221 uint8_t halfHV[256];\
b3184779 2222 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 2223 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
2224 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2225}\
0c1a9eda
ZK
2226void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
2227 uint8_t full[24*17];\
2228 uint8_t halfH[272];\
2229 uint8_t halfV[256];\
2230 uint8_t halfHV[256];\
b3184779
MN
2231 copy_block17(full, src, 24, stride, 17);\
2232 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
2233 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2234 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
2235 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2236}\
0c1a9eda
ZK
2237static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2238 uint8_t full[24*17];\
2239 uint8_t halfH[272];\
db794953
MN
2240 copy_block17(full, src, 24, stride, 17);\
2241 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2242 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2243 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2244}\
0c1a9eda
ZK
2245void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
2246 uint8_t full[24*17];\
2247 uint8_t halfH[272];\
2248 uint8_t halfV[256];\
2249 uint8_t halfHV[256];\
b3184779
MN
2250 copy_block17(full, src, 24, stride, 17);\
2251 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
2252 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2253 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
2254 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2255}\
0c1a9eda
ZK
2256static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2257 uint8_t full[24*17];\
2258 uint8_t halfH[272];\
db794953
MN
2259 copy_block17(full, src, 24, stride, 17);\
2260 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2261 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2262 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2263}\
0c1a9eda
ZK
2264static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2265 uint8_t halfH[272];\
b3184779 2266 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 2267 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
45553457 2268}
44eb4951 2269
b3184779
MN
2270#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2271#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
2272#define op_put(a, b) a = cm[((b) + 16)>>5]
2273#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
2274
2275QPEL_MC(0, put_ , _ , op_put)
2276QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
2277QPEL_MC(0, avg_ , _ , op_avg)
2278//QPEL_MC(1, avg_no_rnd , _ , op_avg)
2279#undef op_avg
2280#undef op_avg_no_rnd
2281#undef op_put
2282#undef op_put_no_rnd
44eb4951 2283
0da71265
MN
2284#if 1
2285#define H264_LOWPASS(OPNAME, OP, OP2) \
bb5705b9 2286static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
80e44bc3 2287 const int h=2;\
55fde95e 2288 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
80e44bc3
MN
2289 int i;\
2290 for(i=0; i<h; i++)\
2291 {\
2292 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2293 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2294 dst+=dstStride;\
2295 src+=srcStride;\
2296 }\
2297}\
2298\
bb5705b9 2299static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
80e44bc3 2300 const int w=2;\
55fde95e 2301 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
80e44bc3
MN
2302 int i;\
2303 for(i=0; i<w; i++)\
2304 {\
2305 const int srcB= src[-2*srcStride];\
2306 const int srcA= src[-1*srcStride];\
2307 const int src0= src[0 *srcStride];\
2308 const int src1= src[1 *srcStride];\
2309 const int src2= src[2 *srcStride];\
2310 const int src3= src[3 *srcStride];\
2311 const int src4= src[4 *srcStride];\
2312 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2313 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2314 dst++;\
2315 src++;\
2316 }\
2317}\
2318\
bb5705b9 2319static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
80e44bc3
MN
2320 const int h=2;\
2321 const int w=2;\
55fde95e 2322 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
80e44bc3
MN
2323 int i;\
2324 src -= 2*srcStride;\
2325 for(i=0; i<h+5; i++)\
2326 {\
2327 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2328 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2329 tmp+=tmpStride;\
2330 src+=srcStride;\
2331 }\
2332 tmp -= tmpStride*(h+5-2);\
2333 for(i=0; i<w; i++)\
2334 {\
2335 const int tmpB= tmp[-2*tmpStride];\
2336 const int tmpA= tmp[-1*tmpStride];\
2337 const int tmp0= tmp[0 *tmpStride];\
2338 const int tmp1= tmp[1 *tmpStride];\
2339 const int tmp2= tmp[2 *tmpStride];\
2340 const int tmp3= tmp[3 *tmpStride];\
2341 const int tmp4= tmp[4 *tmpStride];\
2342 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2343 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2344 dst++;\
2345 tmp++;\
2346 }\
2347}\
0da71265
MN
2348static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2349 const int h=4;\
55fde95e 2350 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
0da71265
MN
2351 int i;\
2352 for(i=0; i<h; i++)\
2353 {\
2354 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2355 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2356 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
2357 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
2358 dst+=dstStride;\
2359 src+=srcStride;\
2360 }\
2361}\
2362\
2363static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2364 const int w=4;\
55fde95e 2365 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
0da71265
MN
2366 int i;\
2367 for(i=0; i<w; i++)\
2368 {\
2369 const int srcB= src[-2*srcStride];\
2370 const int srcA= src[-1*srcStride];\
2371 const int src0= src[0 *srcStride];\
2372 const int src1= src[1 *srcStride];\
2373 const int src2= src[2 *srcStride];\
2374 const int src3= src[3 *srcStride];\
2375 const int src4= src[4 *srcStride];\
2376 const int src5= src[5 *srcStride];\
2377 const int src6= src[6 *srcStride];\
2378 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2379 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2380 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2381 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2382 dst++;\
2383 src++;\
2384 }\
2385}\
2386\
2387static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2388 const int h=4;\
2389 const int w=4;\
55fde95e 2390 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
0da71265
MN
2391 int i;\
2392 src -= 2*srcStride;\
2393 for(i=0; i<h+5; i++)\
2394 {\
2395 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2396 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2397 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
2398 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
2399 tmp+=tmpStride;\
2400 src+=srcStride;\
2401 }\
2402 tmp -= tmpStride*(h+5-2);\
2403 for(i=0; i<w; i++)\
2404 {\
2405 const int tmpB= tmp[-2*tmpStride];\
2406 const int tmpA= tmp[-1*tmpStride];\
2407 const int tmp0= tmp[0 *tmpStride];\
2408 const int tmp1= tmp[1 *tmpStride];\
2409 const int tmp2= tmp[2 *tmpStride];\
2410 const int tmp3= tmp[3 *tmpStride];\
2411 const int tmp4= tmp[4 *tmpStride];\
2412 const int tmp5= tmp[5 *tmpStride];\
2413 const int tmp6= tmp[6 *tmpStride];\
2414 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2415 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2416 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2417 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2418 dst++;\
2419 tmp++;\
2420 }\
2421}\
2422\
2423static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2424 const int h=8;\
55fde95e 2425 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
0da71265
MN
2426 int i;\
2427 for(i=0; i<h; i++)\
2428 {\
2429 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
2430 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
2431 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
2432 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
2433 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
2434 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
2435 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
2436 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
2437 dst+=dstStride;\
2438 src+=srcStride;\
2439 }\
2440}\
2441\
2442static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2443 const int w=8;\
55fde95e 2444 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
0da71265
MN
2445 int i;\
2446 for(i=0; i<w; i++)\
2447 {\
2448 const int srcB= src[-2*srcStride];\
2449 const int srcA= src[-1*srcStride];\
2450 const int src0= src[0 *srcStride];\
2451 const int src1= src[1 *srcStride];\
2452 const int src2= src[2 *srcStride];\
2453 const int src3= src[3 *srcStride];\
2454 const int src4= src[4 *srcStride];\
2455 const int src5= src[5 *srcStride];\
2456 const int src6= src[6 *srcStride];\
2457 const int src7= src[7 *srcStride];\
2458 const int src8= src[8 *srcStride];\
2459 const int src9= src[9 *srcStride];\
2460 const int src10=src[10*srcStride];\
2461 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2462 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2463 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2464 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2465 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
2466 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
2467 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
2468 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
2469 dst++;\
2470 src++;\
2471 }\
2472}\
2473\
2474static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2475 const int h=8;\
2476 const int w=8;\
55fde95e 2477 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
0da71265
MN
2478 int i;\
2479 src -= 2*srcStride;\
2480 for(i=0; i<h+5; i++)\
2481 {\
2482 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
2483 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
2484 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
2485 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
2486 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
2487 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
2488 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
2489 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
2490 tmp+=tmpStride;\
2491 src+=srcStride;\
2492 }\
2493 tmp -= tmpStride*(h+5-2);\
2494 for(i=0; i<w; i++)\
2495 {\
2496 const int tmpB= tmp[-2*tmpStride];\
2497 const int tmpA= tmp[-1*tmpStride];\
2498 const int tmp0= tmp[0 *tmpStride];\
2499 const int tmp1= tmp[1 *tmpStride];\
2500 const int tmp2= tmp[2 *tmpStride];\
2501 const int tmp3= tmp[3 *tmpStride];\
2502 const int tmp4= tmp[4 *tmpStride];\
2503 const int tmp5= tmp[5 *tmpStride];\
2504 const int tmp6= tmp[6 *tmpStride];\
2505 const int tmp7= tmp[7 *tmpStride];\
2506 const int tmp8= tmp[8 *tmpStride];\
2507 const int tmp9= tmp[9 *tmpStride];\
2508 const int tmp10=tmp[10*tmpStride];\
2509 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2510 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2511 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2512 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2513 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
2514 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
2515 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
2516 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
2517 dst++;\
2518 tmp++;\
2519 }\
2520}\
2521\
2522static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2523 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
2524 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2525 src += 8*srcStride;\
2526 dst += 8*dstStride;\
2527 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
2528 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2529}\
2530\
2531static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2532 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
2533 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2534 src += 8*srcStride;\
2535 dst += 8*dstStride;\
2536 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
2537 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2538}\
2539\
2540static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2541 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2542 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2543 src += 8*srcStride;\
0da71265
MN
2544 dst += 8*dstStride;\
2545 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2546 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2547}\
2548
2549#define H264_MC(OPNAME, SIZE) \
2550static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
2551 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
2552}\
2553\
2554static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2555 uint8_t half[SIZE*SIZE];\
2556 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2557 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
2558}\
2559\
2560static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
2561 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
2562}\
2563\
2564static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2565 uint8_t half[SIZE*SIZE];\
2566 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2567 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
2568}\
2569\
2570static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2571 uint8_t full[SIZE*(SIZE+5)];\
2572 uint8_t * const full_mid= full + SIZE*2;\
2573 uint8_t half[SIZE*SIZE];\
2574 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2575 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2576 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
2577}\
2578\
2579static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2580 uint8_t full[SIZE*(SIZE+5)];\
2581 uint8_t * const full_mid= full + SIZE*2;\
2582 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2583 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
2584}\
2585\
2586static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2587 uint8_t full[SIZE*(SIZE+5)];\
2588 uint8_t * const full_mid= full + SIZE*2;\
2589 uint8_t half[SIZE*SIZE];\
2590 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2591 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2592 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
2593}\
2594\
2595static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2596 uint8_t full[SIZE*(SIZE+5)];\
2597 uint8_t * const full_mid= full + SIZE*2;\
2598 uint8_t halfH[SIZE*SIZE];\
2599 uint8_t halfV[SIZE*SIZE];\
2600 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2601 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2602 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2603 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2604}\
2605\
2606static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2607 uint8_t full[SIZE*(SIZE+5)];\
2608 uint8_t * const full_mid= full + SIZE*2;\
2609 uint8_t halfH[SIZE*SIZE];\
2610 uint8_t halfV[SIZE*SIZE];\
2611 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2612 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2613 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2614 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2615}\
2616\
2617static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2618 uint8_t full[SIZE*(SIZE+5)];\
2619 uint8_t * const full_mid= full + SIZE*2;\
2620 uint8_t halfH[SIZE*SIZE];\
2621 uint8_t halfV[SIZE*SIZE];\
2622 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2623 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2624 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2625 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2626}\
2627\
2628static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2629 uint8_t full[SIZE*(SIZE+5)];\
2630 uint8_t * const full_mid= full + SIZE*2;\
2631 uint8_t halfH[SIZE*SIZE];\
2632 uint8_t halfV[SIZE*SIZE];\
2633 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2634 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2635 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2636 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2637}\
2638\
2639static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2640 int16_t tmp[SIZE*(SIZE+5)];\
2641 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
2642}\
2643\
2644static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2645 int16_t tmp[SIZE*(SIZE+5)];\
2646 uint8_t halfH[SIZE*SIZE];\
2647 uint8_t halfHV[SIZE*SIZE];\
2648 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2649 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2650 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2651}\
2652\
2653static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2654 int16_t tmp[SIZE*(SIZE+5)];\
2655 uint8_t halfH[SIZE*SIZE];\
2656 uint8_t halfHV[SIZE*SIZE];\
2657 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2658 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2659 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2660}\
2661\
2662static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2663 uint8_t full[SIZE*(SIZE+5)];\
2664 uint8_t * const full_mid= full + SIZE*2;\
2665 int16_t tmp[SIZE*(SIZE+5)];\
2666 uint8_t halfV[SIZE*SIZE];\
2667 uint8_t halfHV[SIZE*SIZE];\
2668 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2669 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2670 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2671 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2672}\
2673\
2674static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2675 uint8_t full[SIZE*(SIZE+5)];\
2676 uint8_t * const full_mid= full + SIZE*2;\
2677 int16_t tmp[SIZE*(SIZE+5)];\
2678 uint8_t halfV[SIZE*SIZE];\
2679 uint8_t halfHV[SIZE*SIZE];\
2680 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2681 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2682 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2683 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2684}\
2685
2686#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2687//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
2688#define op_put(a, b) a = cm[((b) + 16)>>5]
2689#define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
2690#define op2_put(a, b) a = cm[((b) + 512)>>10]
2691
2692H264_LOWPASS(put_ , op_put, op2_put)
2693H264_LOWPASS(avg_ , op_avg, op2_avg)
80e44bc3 2694H264_MC(put_, 2)
0da71265
MN
2695H264_MC(put_, 4)
2696H264_MC(put_, 8)
2697H264_MC(put_, 16)
2698H264_MC(avg_, 4)
2699H264_MC(avg_, 8)
2700H264_MC(avg_, 16)
2701
2702#undef op_avg
2703#undef op_put
2704#undef op2_avg
2705#undef op2_put
2706#endif
2707
f66e4f5f
RD
2708#define op_scale1(x) block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom )
2709#define op_scale2(x) dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
9f2d1b4f
LM
2710#define H264_WEIGHT(W,H) \
2711static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
e8b56208 2712 int y; \
9f2d1b4f
LM
2713 offset <<= log2_denom; \
2714 if(log2_denom) offset += 1<<(log2_denom-1); \
2715 for(y=0; y<H; y++, block += stride){ \
2716 op_scale1(0); \
2717 op_scale1(1); \
2718 if(W==2) continue; \
2719 op_scale1(2); \
2720 op_scale1(3); \
2721 if(W==4) continue; \
2722 op_scale1(4); \
2723 op_scale1(5); \
2724 op_scale1(6); \
2725 op_scale1(7); \
2726 if(W==8) continue; \
2727 op_scale1(8); \
2728 op_scale1(9); \
2729 op_scale1(10); \
2730 op_scale1(11); \
2731 op_scale1(12); \
2732 op_scale1(13); \
2733 op_scale1(14); \
2734 op_scale1(15); \
2735 } \
2736} \
e8b56208
LM
2737static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
2738 int y; \
2739 offset = ((offset + 1) | 1) << log2_denom; \
9f2d1b4f
LM
2740 for(y=0; y<H; y++, dst += stride, src += stride){ \
2741 op_scale2(0); \
2742 op_scale2(1); \
2743 if(W==2) continue; \
2744 op_scale2(2); \
2745 op_scale2(3); \
2746 if(W==4) continue; \
2747 op_scale2(4); \
2748 op_scale2(5); \
2749 op_scale2(6); \
2750 op_scale2(7); \
2751 if(W==8) continue; \
2752 op_scale2(8); \
2753 op_scale2(9); \
2754 op_scale2(10); \
2755 op_scale2(11); \
2756 op_scale2(12); \
2757 op_scale2(13); \
2758 op_scale2(14); \
2759 op_scale2(15); \
2760 } \
2761}
2762
2763H264_WEIGHT(16,16)
2764H264_WEIGHT(16,8)
2765H264_WEIGHT(8,16)
2766H264_WEIGHT(8,8)
2767H264_WEIGHT(8,4)
2768H264_WEIGHT(4,8)
2769H264_WEIGHT(4,4)
2770H264_WEIGHT(4,2)
2771H264_WEIGHT(2,4)
2772H264_WEIGHT(2,2)
2773
2774#undef op_scale1
2775#undef op_scale2
2776#undef H264_WEIGHT
2777
1457ab52 2778static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
55fde95e 2779 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1457ab52
MN
2780 int i;
2781
2782 for(i=0; i<h; i++){
2783 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
2784 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
2785 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
2786 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
2787 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
2788 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
2789 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
2790 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
2791 dst+=dstStride;
115329f1 2792 src+=srcStride;
1457ab52
MN
2793 }
2794}
2795
b250f9c6 2796#if CONFIG_CAVS_DECODER
b482e2d1 2797/* AVS specific */
b482e2d1
MN
2798void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
2799 put_pixels8_c(dst, src, stride, 8);
2800}
2801void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
2802 avg_pixels8_c(dst, src, stride, 8);
2803}
2804void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
2805 put_pixels16_c(dst, src, stride, 16);
2806}
2807void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
2808 avg_pixels16_c(dst, src, stride, 16);
2809}
29c5cdca 2810#endif /* CONFIG_CAVS_DECODER */
b482e2d1 2811
9be6f0d2 2812#if CONFIG_VC1_DECODER
64db55ae 2813/* VC-1 specific */
4f602a04 2814void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
74691b7b
KS
2815 put_pixels8_c(dst, src, stride, 8);
2816}
4f602a04 2817void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
6cecd630
DC
2818 avg_pixels8_c(dst, src, stride, 8);
2819}
9be6f0d2 2820#endif /* CONFIG_VC1_DECODER */
64db55ae 2821
c6b237da 2822/* H264 specific */
edecaff8 2823void ff_h264dspenc_init(DSPContext* c, AVCodecContext *avctx);
c6b237da 2824
b250f9c6 2825#if CONFIG_RV40_DECODER
2d8a0815
KS
2826static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2827 put_pixels16_xy2_c(dst, src, stride, 16);
2828}
2829static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2830 avg_pixels16_xy2_c(dst, src, stride, 16);
2831}
2832static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2833 put_pixels8_xy2_c(dst, src, stride, 8);
2834}
2835static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2836 avg_pixels8_xy2_c(dst, src, stride, 8);
2837}
2d8a0815
KS
2838#endif /* CONFIG_RV40_DECODER */
2839
1457ab52 2840static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
55fde95e 2841 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1457ab52
MN
2842 int i;
2843
2844 for(i=0; i<w; i++){
2845 const int src_1= src[ -srcStride];
2846 const int src0 = src[0 ];
2847 const int src1 = src[ srcStride];
2848 const int src2 = src[2*srcStride];
2849 const int src3 = src[3*srcStride];
2850 const int src4 = src[4*srcStride];
2851 const int src5 = src[5*srcStride];
2852 const int src6 = src[6*srcStride];
2853 const int src7 = src[7*srcStride];
2854 const int src8 = src[8*srcStride];
2855 const int src9 = src[9*srcStride];
2856 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
2857 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
2858 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
2859 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
2860 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
2861 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
2862 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
2863 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
2864 src++;
2865 dst++;
2866 }
2867}
2868
2869static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
2870 put_pixels8_c(dst, src, stride, 8);
2871}
2872
2873static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
2874 uint8_t half[64];
2875 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
2876 put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
2877}
2878
2879static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
2880 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
2881}
2882
2883static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
2884 uint8_t half[64];
2885 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
2886 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
2887}
2888
2889static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
2890 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
2891}
2892
2893static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
2894 uint8_t halfH[88];
2895 uint8_t halfV[64];
2896 uint8_t halfHV[64];
2897 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2898 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
2899 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
2900 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
2901}
2902static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
2903 uint8_t halfH[88];
2904 uint8_t halfV[64];
2905 uint8_t halfHV[64];
2906 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2907 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
2908 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
2909 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
2910}
2911static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
2912 uint8_t halfH[88];
2913 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2914 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
2915}
2916
332f9ac4 2917static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
4052cbf1 2918 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
332f9ac4
MN
2919 int x;
2920 const int strength= ff_h263_loop_filter_strength[qscale];
115329f1 2921
332f9ac4
MN
2922 for(x=0; x<8; x++){
2923 int d1, d2, ad1;
2924 int p0= src[x-2*stride];
2925 int p1= src[x-1*stride];
2926 int p2= src[x+0*stride];
2927 int p3= src[x+1*stride];
2928 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
2929
2930 if (d<-2*strength) d1= 0;
2931 else if(d<- strength) d1=-2*strength - d;
2932 else if(d< strength) d1= d;
2933 else if(d< 2*strength) d1= 2*strength - d;
2934 else d1= 0;
115329f1 2935
332f9ac4
MN
2936 p1 += d1;
2937 p2 -= d1;
2938 if(p1&256) p1= ~(p1>>31);
2939 if(p2&256) p2= ~(p2>>31);
115329f1 2940
332f9ac4
MN
2941 src[x-1*stride] = p1;
2942 src[x+0*stride] = p2;
2943
c26abfa5 2944 ad1= FFABS(d1)>>1;
115329f1 2945
f66e4f5f 2946 d2= av_clip((p0-p3)/4, -ad1, ad1);
115329f1 2947
332f9ac4
MN
2948 src[x-2*stride] = p0 - d2;
2949 src[x+ stride] = p3 + d2;
2950 }
73f51a4d 2951 }
332f9ac4
MN
2952}
2953
2954static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
4052cbf1 2955 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
332f9ac4
MN
2956 int y;
2957 const int strength= ff_h263_loop_filter_strength[qscale];
115329f1 2958
332f9ac4
MN
2959 for(y=0; y<8; y++){
2960 int d1, d2, ad1;
2961 int p0= src[y*stride-2];
2962 int p1= src[y*stride-1];
2963 int p2= src[y*stride+0];
2964 int p3= src[y*stride+1];
2965 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
2966
2967 if (d<-2*strength) d1= 0;
2968 else if(d<- strength) d1=-2*strength - d;
2969 else if(d< strength) d1= d;
2970 else if(d< 2*strength) d1= 2*strength - d;
2971 else d1= 0;
115329f1 2972
332f9ac4
MN
2973 p1 += d1;
2974 p2 -= d1;
2975 if(p1&256) p1= ~(p1>>31);
2976 if(p2&256) p2= ~(p2>>31);
115329f1 2977
332f9ac4
MN
2978 src[y*stride-1] = p1;
2979 src[y*stride+0] = p2;
2980
c26abfa5 2981 ad1= FFABS(d1)>>1;
115329f1 2982
f66e4f5f 2983 d2= av_clip((p0-p3)/4, -ad1, ad1);
115329f1 2984
332f9ac4
MN
2985 src[y*stride-2] = p0 - d2;
2986 src[y*stride+1] = p3 + d2;
2987 }
73f51a4d 2988 }
332f9ac4 2989}
1457ab52 2990
fdbbf2e0
MN
2991static void h261_loop_filter_c(uint8_t *src, int stride){
2992 int x,y,xy,yz;
2993 int temp[64];
2994
2995 for(x=0; x<8; x++){
2996 temp[x ] = 4*src[x ];
2997 temp[x + 7*8] = 4*src[x + 7*stride];
2998 }
2999 for(y=1; y<7; y++){
3000 for(x=0; x<8; x++){
3001 xy = y * stride + x;
3002 yz = y * 8 + x;
3003 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
c6148de2
MN
3004 }
3005 }
115329f1 3006
fdbbf2e0
MN
3007 for(y=0; y<8; y++){
3008 src[ y*stride] = (temp[ y*8] + 2)>>2;
3009 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
3010 for(x=1; x<7; x++){
3011 xy = y * stride + x;
3012 yz = y * 8 + x;
3013 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
c6148de2
MN
3014 }
3015 }
3016}
3017
3f50965b 3018static av_always_inline av_flatten void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
42251a2a
LM
3019{
3020 int i, d;
3021 for( i = 0; i < 4; i++ ) {
3022 if( tc0[i] < 0 ) {
3023 pix += 4*ystride;
3024 continue;
3025 }
3026 for( d = 0; d < 4; d++ ) {
3027 const int p0 = pix[-1*xstride];
3028 const int p1 = pix[-2*xstride];
3029 const int p2 = pix[-3*xstride];
3030 const int q0 = pix[0];
3031 const int q1 = pix[1*xstride];
3032 const int q2 = pix[2*xstride];
115329f1 3033
c26abfa5
DB
3034 if( FFABS( p0 - q0 ) < alpha &&
3035 FFABS( p1 - p0 ) < beta &&
3036 FFABS( q1 - q0 ) < beta ) {
115329f1 3037
42251a2a
LM
3038 int tc = tc0[i];
3039 int i_delta;
115329f1 3040
c26abfa5 3041 if( FFABS( p2 - p0 ) < beta ) {
c9640c17 3042 if(tc0[i])
f66e4f5f 3043 pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] );
42251a2a
LM
3044 tc++;
3045 }
c26abfa5 3046 if( FFABS( q2 - q0 ) < beta ) {
c9640c17 3047 if(tc0[i])
f66e4f5f 3048 pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] );
42251a2a
LM
3049 tc++;
3050 }
115329f1 3051
f66e4f5f
RD
3052 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
3053 pix[-xstride] = av_clip_uint8( p0 + i_delta ); /* p0' */
3054 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
42251a2a
LM
3055 }
3056 pix += ystride;
3057 }
3058 }
3059}
5cf08f23 3060static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
42251a2a
LM
3061{
3062 h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0);
3063}
5cf08f23 3064static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
42251a2a
LM
3065{
3066 h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0);
3067}
3068
3f50965b 3069static av_always_inline av_flatten void h264_loop_filter_luma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
712ca84c
JGG
3070{
3071 int d;
3072 for( d = 0; d < 16; d++ ) {
3073 const int p2 = pix[-3*xstride];
3074 const int p1 = pix[-2*xstride];
3075 const int p0 = pix[-1*xstride];
3076
3077 const int q0 = pix[ 0*xstride];
3078 const int q1 = pix[ 1*xstride];
3079 const int q2 = pix[ 2*xstride];
3080
3081 if( FFABS( p0 - q0 ) < alpha &&
3082 FFABS( p1 - p0 ) < beta &&
3083 FFABS( q1 - q0 ) < beta ) {
3084
3085 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
3086 if( FFABS( p2 - p0 ) < beta)
3087 {
3088 const int p3 = pix[-4*xstride];
3089 /* p0', p1', p2' */
3090 pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
3091 pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
3092 pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
3093 } else {
3094 /* p0' */
3095 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
3096 }
3097 if( FFABS( q2 - q0 ) < beta)
3098 {
3099 const int q3 = pix[3*xstride];
3100 /* q0', q1', q2' */
3101 pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
3102 pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
3103 pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
3104 } else {
3105 /* q0' */
3106 pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
3107 }
3108 }else{
3109 /* p0', q0' */
3110 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
3111 pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
3112 }
3113 }
3114 pix += ystride;
3115 }
3116}
3117static void h264_v_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
3118{
3119 h264_loop_filter_luma_intra_c(pix, stride, 1, alpha, beta);
3120}
3121static void h264_h_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
3122{
3123 h264_loop_filter_luma_intra_c(pix, 1, stride, alpha, beta);
3124}
3125
3f50965b 3126static av_always_inline av_flatten void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
42251a2a
LM
3127{
3128 int i, d;
3129 for( i = 0; i < 4; i++ ) {
3130 const int tc = tc0[i];
3131 if( tc <= 0 ) {
3132 pix += 2*ystride;
3133 continue;
3134 }
3135 for( d = 0; d < 2; d++ ) {
3136 const int p0 = pix[-1*xstride];
3137 const int p1 = pix[-2*xstride];
3138 const int q0 = pix[0];
3139 const int q1 = pix[1*xstride];
3140
c26abfa5
DB
3141 if( FFABS( p0 - q0 ) < alpha &&
3142 FFABS( p1 - p0 ) < beta &&
3143 FFABS( q1 - q0 ) < beta ) {
42251a2a 3144
f66e4f5f 3145 int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
42251a2a 3146
f66e4f5f
RD
3147 pix[-xstride] = av_clip_uint8( p0 + delta ); /* p0' */
3148 pix[0] = av_clip_uint8( q0 - delta ); /* q0' */
42251a2a
LM
3149 }
3150 pix += ystride;
3151 }
3152 }
3153}
5cf08f23 3154static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
42251a2a
LM
3155{
3156 h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0);
3157}
5cf08f23 3158static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
42251a2a
LM
3159{
3160 h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0);
3161}
3162
3f50965b 3163static av_always_inline av_flatten void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
5cf08f23
LM
3164{
3165 int d;
3166 for( d = 0; d < 8; d++ ) {
3167 const int p0 = pix[-1*xstride];
3168 const int p1 = pix[-2*xstride];
3169 const int q0 = pix[0];
3170 const int q1 = pix[1*xstride];
3171
c26abfa5
DB
3172 if( FFABS( p0 - q0 ) < alpha &&
3173 FFABS( p1 - p0 ) < beta &&
3174 FFABS( q1 - q0 ) < beta ) {
5cf08f23
LM
3175
3176 pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
3177 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
3178 }
3179 pix += ystride;
3180 }
3181}
3182static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
3183{
3184 h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta);
3185}
3186static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
3187{
3188 h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta);
3189}
3190
bb198e19 3191static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
3192{
3193 int s, i;
3194
3195 s = 0;
bb198e19 3196 for(i=0;i<h;i++) {
de6d9b64
FB
3197 s += abs(pix1[0] - pix2[0]);
3198 s += abs(pix1[1] - pix2[1]);
3199 s += abs(pix1[2] - pix2[2]);
3200 s += abs(pix1[3] - pix2[3]);
3201 s += abs(pix1[4] - pix2[4]);
3202 s += abs(pix1[5] - pix2[5]);
3203 s += abs(pix1[6] - pix2[6]);
3204 s += abs(pix1[7] - pix2[7]);
3205 s += abs(pix1[8] - pix2[8]);
3206 s += abs(pix1[9] - pix2[9]);
3207 s += abs(pix1[10] - pix2[10]);
3208 s += abs(pix1[11] - pix2[11]);
3209 s += abs(pix1[12] - pix2[12]);
3210 s += abs(pix1[13] - pix2[13]);
3211 s += abs(pix1[14] - pix2[14]);
3212 s += abs(pix1[15] - pix2[15]);
3213 pix1 += line_size;
3214 pix2 += line_size;
3215 }
3216 return s;
3217}
3218
bb198e19 3219static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
3220{
3221 int s, i;
3222
3223 s = 0;
bb198e19 3224 for(i=0;i<h;i++) {
de6d9b64
FB
3225 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
3226 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
3227 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
3228 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
3229 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
3230 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
3231 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
3232 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
3233 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
3234 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
3235 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
3236 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
3237 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
3238 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
3239 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
3240 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
3241 pix1 += line_size;
3242 pix2 += line_size;
3243 }
3244 return s;
3245}
3246
bb198e19 3247static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
3248{
3249 int s, i;
0c1a9eda 3250 uint8_t *pix3 = pix2 + line_size;
de6d9b64
FB
3251
3252 s = 0;
bb198e19 3253 for(i=0;i<h;i++) {
de6d9b64
FB
3254 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
3255 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
3256 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
3257 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
3258 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
3259 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
3260 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
3261 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
3262 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
3263 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
3264 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
3265 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
3266 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
3267 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
3268 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
3269 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
3270 pix1 += line_size;
3271 pix2 += line_size;
3272 pix3 += line_size;
3273 }
3274 return s;
3275}
3276
bb198e19 3277static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
3278{
3279 int s, i;
0c1a9eda 3280 uint8_t *pix3 = pix2 + line_size;
de6d9b64
FB
3281
3282 s = 0;
bb198e19 3283 for(i=0;i<h;i++) {
de6d9b64
FB
3284 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
3285 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
3286 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
3287 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
3288 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
3289 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
3290 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
3291 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
3292 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
3293 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
3294 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
3295 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
3296 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
3297 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
3298 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
3299 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
3300 pix1 += line_size;
3301 pix2 += line_size;
3302 pix3 += line_size;
3303 }
3304 return s;
3305}
3306
bb198e19 3307static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
3308{
3309 int s, i;
3310
3311 s = 0;
bb198e19 3312 for(i=0;i<h;i++) {
ba6802de
MN
3313 s += abs(pix1[0] - pix2[0]);
3314 s += abs(pix1[1] - pix2[1]);
3315 s += abs(pix1[2] - pix2[2]);
3316 s += abs(pix1[3] - pix2[3]);
3317 s += abs(pix1[4] - pix2[4]);
3318 s += abs(pix1[5] - pix2[5]);
3319 s += abs(pix1[6] - pix2[6]);
3320 s += abs(pix1[7] - pix2[7]);
3321 pix1 += line_size;
3322 pix2 += line_size;
3323 }
3324 return s;
3325}
3326
bb198e19 3327static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
3328{
3329 int s, i;
3330
3331 s = 0;
bb198e19 3332 for(i=0;i<h;i++) {
ba6802de
MN
3333 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
3334 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
3335 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
3336 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
3337 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
3338 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
3339 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
3340 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
3341 pix1 += line_size;
3342 pix2 += line_size;
3343 }
3344 return s;
3345}
3346
bb198e19 3347static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
3348{
3349 int s, i;
0c1a9eda 3350 uint8_t *pix3 = pix2 + line_size;
ba6802de
MN
3351
3352 s = 0;
bb198e19 3353 for(i=0;i<h;i++) {
ba6802de
MN
3354 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
3355 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
3356 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
3357 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
3358 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
3359 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
3360 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
3361 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
3362 pix1 += line_size;
3363 pix2 += line_size;
3364 pix3 += line_size;
3365 }
3366 return s;
3367}
3368
bb198e19 3369static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
3370{
3371 int s, i;
0c1a9eda 3372 uint8_t *pix3 = pix2 + line_size;
ba6802de
MN
3373
3374 s = 0;
bb198e19 3375 for(i=0;i<h;i++) {
ba6802de
MN
3376 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
3377 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
3378 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
3379 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
3380 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
3381 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
3382 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
3383 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
3384 pix1 += line_size;
3385 pix2 += line_size;
3386 pix3 += line_size;
3387 }
3388 return s;
3389}
3390
bf4e3bd2
MR
3391static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
3392 MpegEncContext *c = v;
e6a2ac34
MN
3393 int score1=0;
3394 int score2=0;
3395 int x,y;
d4c5d2ad 3396
e6a2ac34
MN
3397 for(y=0; y<h; y++){
3398 for(x=0; x<16; x++){
3399 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
3400 }
3401 if(y+1<h){
3402 for(x=0; x<15; x++){
c26abfa5 3403 score2+= FFABS( s1[x ] - s1[x +stride]
e6a2ac34 3404 - s1[x+1] + s1[x+1+stride])
c26abfa5 3405 -FFABS( s2[x ] - s2[x +stride]
e6a2ac34
MN
3406 - s2[x+1] + s2[x+1+stride]);
3407 }
3408 }
3409 s1+= stride;
3410 s2+= stride;
3411 }
d4c5d2ad 3412
c26abfa5
DB
3413 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
3414 else return score1 + FFABS(score2)*8;
e6a2ac34
MN
3415}
3416
bf4e3bd2
MR
3417static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
3418 MpegEncContext *c = v;
e6a2ac34
MN
3419 int score1=0;
3420 int score2=0;
3421 int x,y;
115329f1 3422
e6a2ac34
MN
3423 for(y=0; y<h; y++){
3424 for(x=0; x<8; x++){
3425 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
3426 }
3427 if(y+1<h){
3428 for(x=0; x<7; x++){
c26abfa5 3429 score2+= FFABS( s1[x ] - s1[x +stride]
e6a2ac34 3430 - s1[x+1] + s1[x+1+stride])
c26abfa5 3431 -FFABS( s2[x ] - s2[x +stride]
e6a2ac34
MN
3432 - s2[x+1] + s2[x+1+stride]);
3433 }
3434 }
3435 s1+= stride;
3436 s2+= stride;
3437 }
115329f1 3438
c26abfa5
DB
3439 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
3440 else return score1 + FFABS(score2)*8;
e6a2ac34
MN
3441}
3442
364a1797
MN
3443static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
3444 int i;
3445 unsigned int sum=0;
3446
3447 for(i=0; i<8*8; i++){
3448 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
3449 int w= weight[i];
3450 b>>= RECON_SHIFT;
3451 assert(-512<b && b<512);
3452
3453 sum += (w*b)*(w*b)>>4;
3454 }
3455 return sum>>2;
3456}
3457
3458static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
3459 int i;
3460
3461 for(i=0; i<8*8; i++){
3462 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
115329f1 3463 }
364a1797
MN
3464}
3465
a9badb51
MN
3466/**
3467 * permutes an 8x8 block.
2a5700de 3468 * @param block the block which will be permuted according to the given permutation vector
a9badb51
MN
3469 * @param permutation the permutation vector
3470 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
115329f1 3471 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
2a5700de 3472 * (inverse) permutated to scantable order!
a9badb51 3473 */
0c1a9eda 3474void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
d962f6fd 3475{
7801d21d 3476 int i;
477ab036 3477 DCTELEM temp[64];
115329f1 3478
7801d21d 3479 if(last<=0) return;
90b5b51e 3480 //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
d962f6fd 3481
7801d21d
MN
3482 for(i=0; i<=last; i++){
3483 const int j= scantable[i];
3484 temp[j]= block[j];
3485 block[j]=0;
3486 }
115329f1 3487
7801d21d
MN
3488 for(i=0; i<=last; i++){
3489 const int j= scantable[i];
3490 const int perm_j= permutation[j];
3491 block[perm_j]= temp[j];
3492 }
d962f6fd 3493}
e0eac44e 3494
622348f9
MN
3495static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
3496 return 0;
3497}
3498
3499void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
3500 int i;
115329f1 3501
3899eb2f 3502 memset(cmp, 0, sizeof(void*)*6);
115329f1 3503
3899eb2f 3504 for(i=0; i<6; i++){
622348f9
MN
3505 switch(type&0xFF){
3506 case FF_CMP_SAD:
3507 cmp[i]= c->sad[i];
3508 break;
3509 case FF_CMP_SATD:
3510 cmp[i]= c->hadamard8_diff[i];
3511 break;
3512 case FF_CMP_SSE:
3513 cmp[i]= c->sse[i];
3514 break;
3515 case FF_CMP_DCT:
3516 cmp[i]= c->dct_sad[i];
3517 break;
27c61ac5
MN
3518 case FF_CMP_DCT264:
3519 cmp[i]= c->dct264_sad[i];
3520 break;
0fd6aea1
MN
3521 case FF_CMP_DCTMAX:
3522 cmp[i]= c->dct_max[i];
3523 break;
622348f9
MN
3524 case FF_CMP_PSNR:
3525 cmp[i]= c->quant_psnr[i];
3526 break;
3527 case FF_CMP_BIT:
3528 cmp[i]= c->bit[i];
3529 break;
3530 case FF_CMP_RD:
3531 cmp[i]= c->rd[i];
3532 break;
3533 case FF_CMP_VSAD:
3534 cmp[i]= c->vsad[i];
3535 break;
3536 case FF_CMP_VSSE:
3537 cmp[i]= c->vsse[i];
3538 break;
3539 case FF_CMP_ZERO:
3540 cmp[i]= zero_cmp;
3541 break;
e6a2ac34
MN
3542 case FF_CMP_NSSE:
3543 cmp[i]= c->nsse[i];
3544 break;
b250f9c6 3545#if CONFIG_SNOW_ENCODER
26efc54e
MN
3546 case FF_CMP_W53:
3547 cmp[i]= c->w53[i];
3548 break;
3549 case FF_CMP_W97:
3550 cmp[i]= c->w97[i];
3551 break;
3a6fc8fa 3552#endif
622348f9
MN
3553 default:
3554 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
3555 }
3556 }
3557}
3558
5fecfb7d
LM
3559static void clear_block_c(DCTELEM *block)