Move ff_spatial_dwt() prototype to snow.h
[libav.git] / libavcodec / dsputil.c
CommitLineData
de6d9b64
FB
1/*
2 * DSP utils
406792e7 3 * Copyright (c) 2000, 2001 Fabrice Bellard
8f2ab833 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
de6d9b64 5 *
7b94177e
DB
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
7 *
b78e7197
DB
8 * This file is part of FFmpeg.
9 *
10 * FFmpeg is free software; you can redistribute it and/or
ff4ec49e
FB
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
b78e7197 13 * version 2.1 of the License, or (at your option) any later version.
de6d9b64 14 *
b78e7197 15 * FFmpeg is distributed in the hope that it will be useful,
de6d9b64 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ff4ec49e
FB
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
de6d9b64 19 *
ff4ec49e 20 * You should have received a copy of the GNU Lesser General Public
b78e7197 21 * License along with FFmpeg; if not, write to the Free Software
5509bffa 22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
de6d9b64 23 */
115329f1 24
983e3246 25/**
bad5537e 26 * @file libavcodec/dsputil.c
983e3246
MN
27 * DSP utils
28 */
115329f1 29
de6d9b64
FB
30#include "avcodec.h"
31#include "dsputil.h"
b0368839 32#include "simple_idct.h"
65e4c8c9 33#include "faandct.h"
6f08c541 34#include "faanidct.h"
199436b9 35#include "mathops.h"
059715a4 36#include "snow.h"
af818f7a
DB
37#include "mpegvideo.h"
38#include "config.h"
3da11804
MR
39#include "lpc.h"
40#include "ac3dec.h"
41#include "vorbis.h"
42#include "png.h"
5596c60c 43
55fde95e 44uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
1d503957 45uint32_t ff_squareTbl[512] = {0, };
de6d9b64 46
917f55cc
LM
47// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
48#define pb_7f (~0UL/255 * 0x7f)
49#define pb_80 (~0UL/255 * 0x80)
469bd7b1 50
0c1a9eda 51const uint8_t ff_zigzag_direct[64] = {
2ad1516a
MN
52 0, 1, 8, 16, 9, 2, 3, 10,
53 17, 24, 32, 25, 18, 11, 4, 5,
e0eac44e 54 12, 19, 26, 33, 40, 48, 41, 34,
2ad1516a 55 27, 20, 13, 6, 7, 14, 21, 28,
e0eac44e
FB
56 35, 42, 49, 56, 57, 50, 43, 36,
57 29, 22, 15, 23, 30, 37, 44, 51,
58 58, 59, 52, 45, 38, 31, 39, 46,
59 53, 60, 61, 54, 47, 55, 62, 63
60};
61
10acc479
RS
62/* Specific zigzag scan for 248 idct. NOTE that unlike the
63 specification, we interleave the fields */
64const uint8_t ff_zigzag248_direct[64] = {
65 0, 8, 1, 9, 16, 24, 2, 10,
66 17, 25, 32, 40, 48, 56, 33, 41,
67 18, 26, 3, 11, 4, 12, 19, 27,
68 34, 42, 49, 57, 50, 58, 35, 43,
69 20, 28, 5, 13, 6, 14, 21, 29,
70 36, 44, 51, 59, 52, 60, 37, 45,
71 22, 30, 7, 15, 23, 31, 38, 46,
72 53, 61, 54, 62, 39, 47, 55, 63,
73};
74
2f349de2 75/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
84dc2d8a 76DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
2f349de2 77
0c1a9eda 78const uint8_t ff_alternate_horizontal_scan[64] = {
115329f1 79 0, 1, 2, 3, 8, 9, 16, 17,
e0eac44e 80 10, 11, 4, 5, 6, 7, 15, 14,
115329f1 81 13, 12, 19, 18, 24, 25, 32, 33,
e0eac44e 82 26, 27, 20, 21, 22, 23, 28, 29,
115329f1 83 30, 31, 34, 35, 40, 41, 48, 49,
e0eac44e 84 42, 43, 36, 37, 38, 39, 44, 45,
115329f1 85 46, 47, 50, 51, 56, 57, 58, 59,
e0eac44e
FB
86 52, 53, 54, 55, 60, 61, 62, 63,
87};
88
0c1a9eda 89const uint8_t ff_alternate_vertical_scan[64] = {
115329f1 90 0, 8, 16, 24, 1, 9, 2, 10,
e0eac44e 91 17, 25, 32, 40, 48, 56, 57, 49,
115329f1 92 41, 33, 26, 18, 3, 11, 4, 12,
e0eac44e 93 19, 27, 34, 42, 50, 58, 35, 43,
115329f1 94 51, 59, 20, 28, 5, 13, 6, 14,
e0eac44e 95 21, 29, 36, 44, 52, 60, 37, 45,
115329f1 96 53, 61, 22, 30, 7, 15, 23, 31,
e0eac44e
FB
97 38, 46, 54, 62, 39, 47, 55, 63,
98};
99
1a918c08
LM
100/* a*inverse[b]>>32 == a/b for all 0<=a<=16909558 && 2<=b<=256
101 * for a>16909558, is an overestimate by less than 1 part in 1<<24 */
102const uint32_t ff_inverse[257]={
115329f1
DB
103 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
104 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
105 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
106 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
107 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
108 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
109 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
110 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
111 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
112 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
113 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
114 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
115 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
116 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
117 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
118 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
119 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
120 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
121 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
122 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
123 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
124 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
125 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
126 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
127 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
128 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
129 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
130 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
131 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
132 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
133 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
2f349de2 134 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
1a918c08 135 16777216
2f349de2
MN
136};
137
b0368839
MN
138/* Input permutation for the simple_idct_mmx */
139static const uint8_t simple_mmx_permutation[64]={
bb270c08
DB
140 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
141 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
142 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
143 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
144 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
145 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
146 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
147 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
b0368839
MN
148};
149
0e956ba2
AS
150static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
151
4c79b95c
AJ
152void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
153 int i;
154 int end;
155
156 st->scantable= src_scantable;
157
158 for(i=0; i<64; i++){
159 int j;
160 j = src_scantable[i];
161 st->permutated[i] = permutation[j];
b250f9c6 162#if ARCH_PPC
4c79b95c
AJ
163 st->inverse[j] = i;
164#endif
165 }
166
167 end=-1;
168 for(i=0; i<64; i++){
169 int j;
170 j = st->permutated[i];
171 if(j>end) end=j;
172 st->raster_end[i]= end;
173 }
174}
175
0c1a9eda 176static int pix_sum_c(uint8_t * pix, int line_size)
3aa102be
MN
177{
178 int s, i, j;
179
180 s = 0;
181 for (i = 0; i < 16; i++) {
bb270c08
DB
182 for (j = 0; j < 16; j += 8) {
183 s += pix[0];
184 s += pix[1];
185 s += pix[2];
186 s += pix[3];
187 s += pix[4];
188 s += pix[5];
189 s += pix[6];
190 s += pix[7];
191 pix += 8;
192 }
193 pix += line_size - 16;
3aa102be
MN
194 }
195 return s;
196}
197
0c1a9eda 198static int pix_norm1_c(uint8_t * pix, int line_size)
3aa102be
MN
199{
200 int s, i, j;
1d503957 201 uint32_t *sq = ff_squareTbl + 256;
3aa102be
MN
202
203 s = 0;
204 for (i = 0; i < 16; i++) {
bb270c08 205 for (j = 0; j < 16; j += 8) {
2a006cd3 206#if 0
bb270c08
DB
207 s += sq[pix[0]];
208 s += sq[pix[1]];
209 s += sq[pix[2]];
210 s += sq[pix[3]];
211 s += sq[pix[4]];
212 s += sq[pix[5]];
213 s += sq[pix[6]];
214 s += sq[pix[7]];
2a006cd3
FL
215#else
216#if LONG_MAX > 2147483647
bb270c08
DB
217 register uint64_t x=*(uint64_t*)pix;
218 s += sq[x&0xff];
219 s += sq[(x>>8)&0xff];
220 s += sq[(x>>16)&0xff];
221 s += sq[(x>>24)&0xff];
2a006cd3
FL
222 s += sq[(x>>32)&0xff];
223 s += sq[(x>>40)&0xff];
224 s += sq[(x>>48)&0xff];
225 s += sq[(x>>56)&0xff];
226#else
bb270c08
DB
227 register uint32_t x=*(uint32_t*)pix;
228 s += sq[x&0xff];
229 s += sq[(x>>8)&0xff];
230 s += sq[(x>>16)&0xff];
231 s += sq[(x>>24)&0xff];
2a006cd3
FL
232 x=*(uint32_t*)(pix+4);
233 s += sq[x&0xff];
234 s += sq[(x>>8)&0xff];
235 s += sq[(x>>16)&0xff];
236 s += sq[(x>>24)&0xff];
237#endif
238#endif
bb270c08
DB
239 pix += 8;
240 }
241 pix += line_size - 16;
3aa102be
MN
242 }
243 return s;
244}
245
96711ecf 246static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
3d2e8cce 247 int i;
115329f1 248
3d2e8cce
MN
249 for(i=0; i+8<=w; i+=8){
250 dst[i+0]= bswap_32(src[i+0]);
251 dst[i+1]= bswap_32(src[i+1]);
252 dst[i+2]= bswap_32(src[i+2]);
253 dst[i+3]= bswap_32(src[i+3]);
254 dst[i+4]= bswap_32(src[i+4]);
255 dst[i+5]= bswap_32(src[i+5]);
256 dst[i+6]= bswap_32(src[i+6]);
257 dst[i+7]= bswap_32(src[i+7]);
258 }
259 for(;i<w; i++){
260 dst[i+0]= bswap_32(src[i+0]);
261 }
262}
3aa102be 263
26efc54e
MN
264static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
265{
266 int s, i;
1d503957 267 uint32_t *sq = ff_squareTbl + 256;
26efc54e
MN
268
269 s = 0;
270 for (i = 0; i < h; i++) {
271 s += sq[pix1[0] - pix2[0]];
272 s += sq[pix1[1] - pix2[1]];
273 s += sq[pix1[2] - pix2[2]];
274 s += sq[pix1[3] - pix2[3]];
275 pix1 += line_size;
276 pix2 += line_size;
277 }
278 return s;
279}
280
bb198e19 281static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
1457ab52
MN
282{
283 int s, i;
1d503957 284 uint32_t *sq = ff_squareTbl + 256;
1457ab52
MN
285
286 s = 0;
bb198e19 287 for (i = 0; i < h; i++) {
1457ab52
MN
288 s += sq[pix1[0] - pix2[0]];
289 s += sq[pix1[1] - pix2[1]];
290 s += sq[pix1[2] - pix2[2]];
291 s += sq[pix1[3] - pix2[3]];
292 s += sq[pix1[4] - pix2[4]];
293 s += sq[pix1[5] - pix2[5]];
294 s += sq[pix1[6] - pix2[6]];
295 s += sq[pix1[7] - pix2[7]];
296 pix1 += line_size;
297 pix2 += line_size;
298 }
299 return s;
300}
301
bb198e19 302static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
9c76bd48 303{
6b026927 304 int s, i;
1d503957 305 uint32_t *sq = ff_squareTbl + 256;
9c76bd48
BF
306
307 s = 0;
bb198e19 308 for (i = 0; i < h; i++) {
6b026927
FH
309 s += sq[pix1[ 0] - pix2[ 0]];
310 s += sq[pix1[ 1] - pix2[ 1]];
311 s += sq[pix1[ 2] - pix2[ 2]];
312 s += sq[pix1[ 3] - pix2[ 3]];
313 s += sq[pix1[ 4] - pix2[ 4]];
314 s += sq[pix1[ 5] - pix2[ 5]];
315 s += sq[pix1[ 6] - pix2[ 6]];
316 s += sq[pix1[ 7] - pix2[ 7]];
317 s += sq[pix1[ 8] - pix2[ 8]];
318 s += sq[pix1[ 9] - pix2[ 9]];
319 s += sq[pix1[10] - pix2[10]];
320 s += sq[pix1[11] - pix2[11]];
321 s += sq[pix1[12] - pix2[12]];
322 s += sq[pix1[13] - pix2[13]];
323 s += sq[pix1[14] - pix2[14]];
324 s += sq[pix1[15] - pix2[15]];
2a006cd3 325
6b026927
FH
326 pix1 += line_size;
327 pix2 += line_size;
9c76bd48
BF
328 }
329 return s;
330}
331
26efc54e 332
b250f9c6 333#if CONFIG_SNOW_ENCODER //dwt is in snow.c
3a6fc8fa 334static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){
26efc54e
MN
335 int s, i, j;
336 const int dec_count= w==8 ? 3 : 4;
871371a7 337 int tmp[32*32];
26efc54e 338 int level, ori;
115329f1 339 static const int scale[2][2][4][4]={
26efc54e
MN
340 {
341 {
871371a7 342 // 9/7 8x8 dec=3
26efc54e
MN
343 {268, 239, 239, 213},
344 { 0, 224, 224, 152},
345 { 0, 135, 135, 110},
346 },{
871371a7 347 // 9/7 16x16 or 32x32 dec=4
26efc54e
MN
348 {344, 310, 310, 280},
349 { 0, 320, 320, 228},
350 { 0, 175, 175, 136},
351 { 0, 129, 129, 102},
352 }
353 },{
871371a7
LM
354 {
355 // 5/3 8x8 dec=3
26efc54e
MN
356 {275, 245, 245, 218},
357 { 0, 230, 230, 156},
358 { 0, 138, 138, 113},
359 },{
871371a7 360 // 5/3 16x16 or 32x32 dec=4
26efc54e
MN
361 {352, 317, 317, 286},
362 { 0, 328, 328, 233},
363 { 0, 180, 180, 140},
364 { 0, 132, 132, 105},
365 }
366 }
367 };
26efc54e
MN
368
369 for (i = 0; i < h; i++) {
370 for (j = 0; j < w; j+=4) {
871371a7
LM
371 tmp[32*i+j+0] = (pix1[j+0] - pix2[j+0])<<4;
372 tmp[32*i+j+1] = (pix1[j+1] - pix2[j+1])<<4;
373 tmp[32*i+j+2] = (pix1[j+2] - pix2[j+2])<<4;
374 tmp[32*i+j+3] = (pix1[j+3] - pix2[j+3])<<4;
26efc54e
MN
375 }
376 pix1 += line_size;
377 pix2 += line_size;
378 }
8b975b7c 379
871371a7 380 ff_spatial_dwt(tmp, w, h, 32, type, dec_count);
26efc54e
MN
381
382 s=0;
871371a7 383 assert(w==h);
26efc54e
MN
384 for(level=0; level<dec_count; level++){
385 for(ori= level ? 1 : 0; ori<4; ori++){
871371a7
LM
386 int size= w>>(dec_count-level);
387 int sx= (ori&1) ? size : 0;
388 int stride= 32<<(dec_count-level);
26efc54e 389 int sy= (ori&2) ? stride>>1 : 0;
115329f1 390
26efc54e
MN
391 for(i=0; i<size; i++){
392 for(j=0; j<size; j++){
393 int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
c26abfa5 394 s += FFABS(v);
26efc54e
MN
395 }
396 }
397 }
398 }
115329f1 399 assert(s>=0);
871371a7 400 return s>>9;
26efc54e
MN
401}
402
403static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
404 return w_c(v, pix1, pix2, line_size, 8, h, 1);
405}
406
407static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
408 return w_c(v, pix1, pix2, line_size, 8, h, 0);
409}
410
411static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
412 return w_c(v, pix1, pix2, line_size, 16, h, 1);
413}
414
415static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
416 return w_c(v, pix1, pix2, line_size, 16, h, 0);
417}
418
486497e0 419int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
871371a7
LM
420 return w_c(v, pix1, pix2, line_size, 32, h, 1);
421}
422
486497e0 423int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
871371a7
LM
424 return w_c(v, pix1, pix2, line_size, 32, h, 0);
425}
3a6fc8fa 426#endif
871371a7 427
5a6a9e78
AJ
428/* draw the edges of width 'w' of an image of size width, height */
429//FIXME check that this is ok for mpeg4 interlaced
430static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
431{
432 uint8_t *ptr, *last_line;
433 int i;
434
435 last_line = buf + (height - 1) * wrap;
436 for(i=0;i<w;i++) {
437 /* top and bottom */
438 memcpy(buf - (i + 1) * wrap, buf, width);
439 memcpy(last_line + (i + 1) * wrap, last_line, width);
440 }
441 /* left and right */
442 ptr = buf;
443 for(i=0;i<height;i++) {
444 memset(ptr - w, ptr[0], w);
445 memset(ptr + width, ptr[width-1], w);
446 ptr += wrap;
447 }
448 /* corners */
449 for(i=0;i<w;i++) {
450 memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
451 memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
452 memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
453 memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
454 }
455}
456
288a44fb
AJ
457/**
458 * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
459 * @param buf destination buffer
460 * @param src source buffer
461 * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
462 * @param block_w width of block
463 * @param block_h height of block
464 * @param src_x x coordinate of the top left sample of the block in the source buffer
465 * @param src_y y coordinate of the top left sample of the block in the source buffer
466 * @param w width of the source buffer
467 * @param h height of the source buffer
468 */
469void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
470 int src_x, int src_y, int w, int h){
471 int x, y;
472 int start_y, start_x, end_y, end_x;
473
474 if(src_y>= h){
475 src+= (h-1-src_y)*linesize;
476 src_y=h-1;
477 }else if(src_y<=-block_h){
478 src+= (1-block_h-src_y)*linesize;
479 src_y=1-block_h;
480 }
481 if(src_x>= w){
482 src+= (w-1-src_x);
483 src_x=w-1;
484 }else if(src_x<=-block_w){
485 src+= (1-block_w-src_x);
486 src_x=1-block_w;
487 }
488
489 start_y= FFMAX(0, -src_y);
490 start_x= FFMAX(0, -src_x);
491 end_y= FFMIN(block_h, h-src_y);
492 end_x= FFMIN(block_w, w-src_x);
493
494 // copy existing part
495 for(y=start_y; y<end_y; y++){
496 for(x=start_x; x<end_x; x++){
497 buf[x + y*linesize]= src[x + y*linesize];
498 }
499 }
500
501 //top
502 for(y=0; y<start_y; y++){
503 for(x=start_x; x<end_x; x++){
504 buf[x + y*linesize]= buf[x + start_y*linesize];
505 }
506 }
507
508 //bottom
509 for(y=end_y; y<block_h; y++){
510 for(x=start_x; x<end_x; x++){
511 buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
512 }
513 }
514
515 for(y=0; y<block_h; y++){
516 //left
517 for(x=0; x<start_x; x++){
518 buf[x + y*linesize]= buf[start_x + y*linesize];
519 }
520
521 //right
522 for(x=end_x; x<block_w; x++){
523 buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
524 }
525 }
526}
527
0c1a9eda 528static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
de6d9b64 529{
de6d9b64
FB
530 int i;
531
532 /* read the pixels */
de6d9b64 533 for(i=0;i<8;i++) {
c13e1abd
FH
534 block[0] = pixels[0];
535 block[1] = pixels[1];
536 block[2] = pixels[2];
537 block[3] = pixels[3];
538 block[4] = pixels[4];
539 block[5] = pixels[5];
540 block[6] = pixels[6];
541 block[7] = pixels[7];
542 pixels += line_size;
543 block += 8;
de6d9b64
FB
544 }
545}
546
0c1a9eda 547static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
bb270c08 548 const uint8_t *s2, int stride){
9dbcbd92
MN
549 int i;
550
551 /* read the pixels */
9dbcbd92 552 for(i=0;i<8;i++) {
c13e1abd
FH
553 block[0] = s1[0] - s2[0];
554 block[1] = s1[1] - s2[1];
555 block[2] = s1[2] - s2[2];
556 block[3] = s1[3] - s2[3];
557 block[4] = s1[4] - s2[4];
558 block[5] = s1[5] - s2[5];
559 block[6] = s1[6] - s2[6];
560 block[7] = s1[7] - s2[7];
9dbcbd92
MN
561 s1 += stride;
562 s2 += stride;
c13e1abd 563 block += 8;
9dbcbd92
MN
564 }
565}
566
567
0c1a9eda 568static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
bb270c08 569 int line_size)
de6d9b64 570{
de6d9b64 571 int i;
55fde95e 572 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 573
de6d9b64 574 /* read the pixels */
de6d9b64 575 for(i=0;i<8;i++) {
c13e1abd
FH
576 pixels[0] = cm[block[0]];
577 pixels[1] = cm[block[1]];
578 pixels[2] = cm[block[2]];
579 pixels[3] = cm[block[3]];
580 pixels[4] = cm[block[4]];
581 pixels[5] = cm[block[5]];
582 pixels[6] = cm[block[6]];
583 pixels[7] = cm[block[7]];
584
585 pixels += line_size;
586 block += 8;
de6d9b64
FB
587 }
588}
589
178fcca8 590static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
bb270c08 591 int line_size)
178fcca8
MN
592{
593 int i;
55fde95e 594 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 595
178fcca8
MN
596 /* read the pixels */
597 for(i=0;i<4;i++) {
598 pixels[0] = cm[block[0]];
599 pixels[1] = cm[block[1]];
600 pixels[2] = cm[block[2]];
601 pixels[3] = cm[block[3]];
602
603 pixels += line_size;
604 block += 8;
605 }
606}
607
9ca358b9 608static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
bb270c08 609 int line_size)
9ca358b9
MN
610{
611 int i;
55fde95e 612 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 613
9ca358b9
MN
614 /* read the pixels */
615 for(i=0;i<2;i++) {
616 pixels[0] = cm[block[0]];
617 pixels[1] = cm[block[1]];
618
619 pixels += line_size;
620 block += 8;
621 }
622}
623
115329f1 624static void put_signed_pixels_clamped_c(const DCTELEM *block,
f9ed9d85
MM
625 uint8_t *restrict pixels,
626 int line_size)
627{
628 int i, j;
629
630 for (i = 0; i < 8; i++) {
631 for (j = 0; j < 8; j++) {
632 if (*block < -128)
633 *pixels = 0;
634 else if (*block > 127)
635 *pixels = 255;
636 else
637 *pixels = (uint8_t)(*block + 128);
638 block++;
639 pixels++;
640 }
641 pixels += (line_size - 8);
642 }
643}
644
342c7dfd
KS
645static void put_pixels_nonclamped_c(const DCTELEM *block, uint8_t *restrict pixels,
646 int line_size)
647{
648 int i;
649
650 /* read the pixels */
651 for(i=0;i<8;i++) {
652 pixels[0] = block[0];
653 pixels[1] = block[1];
654 pixels[2] = block[2];
655 pixels[3] = block[3];
656 pixels[4] = block[4];
657 pixels[5] = block[5];
658 pixels[6] = block[6];
659 pixels[7] = block[7];
660
661 pixels += line_size;
662 block += 8;
663 }
664}
665
0c1a9eda 666static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
c13e1abd 667 int line_size)
de6d9b64 668{
de6d9b64 669 int i;
55fde95e 670 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 671
de6d9b64 672 /* read the pixels */
de6d9b64 673 for(i=0;i<8;i++) {
c13e1abd
FH
674 pixels[0] = cm[pixels[0] + block[0]];
675 pixels[1] = cm[pixels[1] + block[1]];
676 pixels[2] = cm[pixels[2] + block[2]];
677 pixels[3] = cm[pixels[3] + block[3]];
678 pixels[4] = cm[pixels[4] + block[4]];
679 pixels[5] = cm[pixels[5] + block[5]];
680 pixels[6] = cm[pixels[6] + block[6]];
681 pixels[7] = cm[pixels[7] + block[7]];
682 pixels += line_size;
683 block += 8;
de6d9b64
FB
684 }
685}
178fcca8
MN
686
687static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
688 int line_size)
689{
690 int i;
55fde95e 691 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 692
178fcca8
MN
693 /* read the pixels */
694 for(i=0;i<4;i++) {
695 pixels[0] = cm[pixels[0] + block[0]];
696 pixels[1] = cm[pixels[1] + block[1]];
697 pixels[2] = cm[pixels[2] + block[2]];
698 pixels[3] = cm[pixels[3] + block[3]];
699 pixels += line_size;
700 block += 8;
701 }
702}
9ca358b9
MN
703
704static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
705 int line_size)
706{
707 int i;
55fde95e 708 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
115329f1 709
9ca358b9
MN
710 /* read the pixels */
711 for(i=0;i<2;i++) {
712 pixels[0] = cm[pixels[0] + block[0]];
713 pixels[1] = cm[pixels[1] + block[1]];
714 pixels += line_size;
715 block += 8;
716 }
717}
36940eca
LM
718
719static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
720{
721 int i;
722 for(i=0;i<8;i++) {
723 pixels[0] += block[0];
724 pixels[1] += block[1];
725 pixels[2] += block[2];
726 pixels[3] += block[3];
727 pixels[4] += block[4];
728 pixels[5] += block[5];
729 pixels[6] += block[6];
730 pixels[7] += block[7];
731 pixels += line_size;
732 block += 8;
733 }
734}
735
736static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
737{
738 int i;
739 for(i=0;i<4;i++) {
740 pixels[0] += block[0];
741 pixels[1] += block[1];
742 pixels[2] += block[2];
743 pixels[3] += block[3];
744 pixels += line_size;
745 block += 4;
746 }
747}
748
1edbfe19
LM
749static int sum_abs_dctelem_c(DCTELEM *block)
750{
751 int sum=0, i;
752 for(i=0; i<64; i++)
753 sum+= FFABS(block[i]);
754 return sum;
755}
756
342c7dfd
KS
757static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
758{
759 int i;
760
761 for (i = 0; i < h; i++) {
762 memset(block, value, 16);
763 block += line_size;
764 }
765}
766
767static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
768{
769 int i;
770
771 for (i = 0; i < h; i++) {
772 memset(block, value, 8);
773 block += line_size;
774 }
775}
776
777static void scale_block_c(const uint8_t src[64]/*align 8*/, uint8_t *dst/*align 8*/, int linesize)
778{
779 int i, j;
780 uint16_t *dst1 = dst;
781 uint16_t *dst2 = dst + linesize;
782
783 for (j = 0; j < 8; j++) {
784 for (i = 0; i < 8; i++) {
785 dst1[i] = dst2[i] = src[i] * 0x0101;
786 }
787 src += 8;
788 dst1 += linesize;
789 dst2 += linesize;
790 }
791}
792
59fe111e
MN
793#if 0
794
795#define PIXOP2(OPNAME, OP) \
b3184779 796static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
797{\
798 int i;\
799 for(i=0; i<h; i++){\
905694d9 800 OP(*((uint64_t*)block), AV_RN64(pixels));\
59fe111e
MN
801 pixels+=line_size;\
802 block +=line_size;\
803 }\
804}\
805\
45553457 806static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
807{\
808 int i;\
809 for(i=0; i<h; i++){\
905694d9
RS
810 const uint64_t a= AV_RN64(pixels );\
811 const uint64_t b= AV_RN64(pixels+1);\
59fe111e
MN
812 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
813 pixels+=line_size;\
814 block +=line_size;\
815 }\
816}\
817\
45553457 818static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
819{\
820 int i;\
821 for(i=0; i<h; i++){\
905694d9
RS
822 const uint64_t a= AV_RN64(pixels );\
823 const uint64_t b= AV_RN64(pixels+1);\
59fe111e
MN
824 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
825 pixels+=line_size;\
826 block +=line_size;\
827 }\
828}\
829\
45553457 830static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
831{\
832 int i;\
833 for(i=0; i<h; i++){\
905694d9
RS
834 const uint64_t a= AV_RN64(pixels );\
835 const uint64_t b= AV_RN64(pixels+line_size);\
59fe111e
MN
836 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
837 pixels+=line_size;\
838 block +=line_size;\
839 }\
840}\
841\
45553457 842static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
843{\
844 int i;\
845 for(i=0; i<h; i++){\
905694d9
RS
846 const uint64_t a= AV_RN64(pixels );\
847 const uint64_t b= AV_RN64(pixels+line_size);\
59fe111e
MN
848 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
849 pixels+=line_size;\
850 block +=line_size;\
851 }\
852}\
853\
45553457 854static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
855{\
856 int i;\
905694d9
RS
857 const uint64_t a= AV_RN64(pixels );\
858 const uint64_t b= AV_RN64(pixels+1);\
59fe111e
MN
859 uint64_t l0= (a&0x0303030303030303ULL)\
860 + (b&0x0303030303030303ULL)\
861 + 0x0202020202020202ULL;\
862 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
863 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
864 uint64_t l1,h1;\
865\
866 pixels+=line_size;\
867 for(i=0; i<h; i+=2){\
905694d9
RS
868 uint64_t a= AV_RN64(pixels );\
869 uint64_t b= AV_RN64(pixels+1);\
59fe111e
MN
870 l1= (a&0x0303030303030303ULL)\
871 + (b&0x0303030303030303ULL);\
872 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
873 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
874 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
875 pixels+=line_size;\
876 block +=line_size;\
905694d9
RS
877 a= AV_RN64(pixels );\
878 b= AV_RN64(pixels+1);\
59fe111e
MN
879 l0= (a&0x0303030303030303ULL)\
880 + (b&0x0303030303030303ULL)\
881 + 0x0202020202020202ULL;\
882 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
883 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
884 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
885 pixels+=line_size;\
886 block +=line_size;\
887 }\
888}\
889\
45553457 890static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
891{\
892 int i;\
905694d9
RS
893 const uint64_t a= AV_RN64(pixels );\
894 const uint64_t b= AV_RN64(pixels+1);\
59fe111e
MN
895 uint64_t l0= (a&0x0303030303030303ULL)\
896 + (b&0x0303030303030303ULL)\
897 + 0x0101010101010101ULL;\
898 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
899 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
900 uint64_t l1,h1;\
901\
902 pixels+=line_size;\
903 for(i=0; i<h; i+=2){\
905694d9
RS
904 uint64_t a= AV_RN64(pixels );\
905 uint64_t b= AV_RN64(pixels+1);\
59fe111e
MN
906 l1= (a&0x0303030303030303ULL)\
907 + (b&0x0303030303030303ULL);\
908 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
909 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
910 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
911 pixels+=line_size;\
912 block +=line_size;\
905694d9
RS
913 a= AV_RN64(pixels );\
914 b= AV_RN64(pixels+1);\
59fe111e
MN
915 l0= (a&0x0303030303030303ULL)\
916 + (b&0x0303030303030303ULL)\
917 + 0x0101010101010101ULL;\
918 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
919 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
920 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
921 pixels+=line_size;\
922 block +=line_size;\
923 }\
924}\
925\
45553457
ZK
926CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
927CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
928CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
929CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
930CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
931CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
932CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
59fe111e
MN
933
934#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
935#else // 64 bit variant
936
937#define PIXOP2(OPNAME, OP) \
669ac79c
MN
938static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
939 int i;\
940 for(i=0; i<h; i++){\
905694d9 941 OP(*((uint16_t*)(block )), AV_RN16(pixels ));\
669ac79c
MN
942 pixels+=line_size;\
943 block +=line_size;\
944 }\
945}\
0da71265
MN
946static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
947 int i;\
948 for(i=0; i<h; i++){\
905694d9 949 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
0da71265
MN
950 pixels+=line_size;\
951 block +=line_size;\
952 }\
953}\
45553457 954static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
59fe111e
MN
955 int i;\
956 for(i=0; i<h; i++){\
905694d9
RS
957 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
958 OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
59fe111e
MN
959 pixels+=line_size;\
960 block +=line_size;\
961 }\
962}\
45553457
ZK
963static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
964 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
b3184779 965}\
59fe111e 966\
b3184779
MN
967static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
968 int src_stride1, int src_stride2, int h){\
59fe111e
MN
969 int i;\
970 for(i=0; i<h; i++){\
b3184779 971 uint32_t a,b;\
905694d9
RS
972 a= AV_RN32(&src1[i*src_stride1 ]);\
973 b= AV_RN32(&src2[i*src_stride2 ]);\
d8085ea7 974 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
905694d9
RS
975 a= AV_RN32(&src1[i*src_stride1+4]);\
976 b= AV_RN32(&src2[i*src_stride2+4]);\
d8085ea7 977 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
59fe111e
MN
978 }\
979}\
980\
b3184779
MN
981static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
982 int src_stride1, int src_stride2, int h){\
59fe111e
MN
983 int i;\
984 for(i=0; i<h; i++){\
b3184779 985 uint32_t a,b;\
905694d9
RS
986 a= AV_RN32(&src1[i*src_stride1 ]);\
987 b= AV_RN32(&src2[i*src_stride2 ]);\
d8085ea7 988 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
905694d9
RS
989 a= AV_RN32(&src1[i*src_stride1+4]);\
990 b= AV_RN32(&src2[i*src_stride2+4]);\
d8085ea7 991 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
59fe111e
MN
992 }\
993}\
994\
0da71265
MN
995static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
996 int src_stride1, int src_stride2, int h){\
997 int i;\
998 for(i=0; i<h; i++){\
999 uint32_t a,b;\
905694d9
RS
1000 a= AV_RN32(&src1[i*src_stride1 ]);\
1001 b= AV_RN32(&src2[i*src_stride2 ]);\
d8085ea7 1002 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
0da71265
MN
1003 }\
1004}\
1005\
669ac79c
MN
1006static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
1007 int src_stride1, int src_stride2, int h){\
1008 int i;\
1009 for(i=0; i<h; i++){\
1010 uint32_t a,b;\
905694d9
RS
1011 a= AV_RN16(&src1[i*src_stride1 ]);\
1012 b= AV_RN16(&src2[i*src_stride2 ]);\
669ac79c
MN
1013 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
1014 }\
1015}\
1016\
b3184779
MN
1017static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
1018 int src_stride1, int src_stride2, int h){\
1019 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
1020 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
1021}\
1022\
1023static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
1024 int src_stride1, int src_stride2, int h){\
1025 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
1026 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
1027}\
1028\
45553457 1029static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
1030 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1031}\
1032\
45553457 1033static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
1034 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1035}\
1036\
45553457 1037static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
1038 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1039}\
1040\
45553457 1041static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b3184779
MN
1042 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1043}\
1044\
1045static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1046 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
59fe111e
MN
1047 int i;\
1048 for(i=0; i<h; i++){\
b3184779 1049 uint32_t a, b, c, d, l0, l1, h0, h1;\
905694d9
RS
1050 a= AV_RN32(&src1[i*src_stride1]);\
1051 b= AV_RN32(&src2[i*src_stride2]);\
1052 c= AV_RN32(&src3[i*src_stride3]);\
1053 d= AV_RN32(&src4[i*src_stride4]);\
b3184779
MN
1054 l0= (a&0x03030303UL)\
1055 + (b&0x03030303UL)\
1056 + 0x02020202UL;\
1057 h0= ((a&0xFCFCFCFCUL)>>2)\
1058 + ((b&0xFCFCFCFCUL)>>2);\
1059 l1= (c&0x03030303UL)\
1060 + (d&0x03030303UL);\
1061 h1= ((c&0xFCFCFCFCUL)>>2)\
1062 + ((d&0xFCFCFCFCUL)>>2);\
1063 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
905694d9
RS
1064 a= AV_RN32(&src1[i*src_stride1+4]);\
1065 b= AV_RN32(&src2[i*src_stride2+4]);\
1066 c= AV_RN32(&src3[i*src_stride3+4]);\
1067 d= AV_RN32(&src4[i*src_stride4+4]);\
b3184779
MN
1068 l0= (a&0x03030303UL)\
1069 + (b&0x03030303UL)\
1070 + 0x02020202UL;\
1071 h0= ((a&0xFCFCFCFCUL)>>2)\
1072 + ((b&0xFCFCFCFCUL)>>2);\
1073 l1= (c&0x03030303UL)\
1074 + (d&0x03030303UL);\
1075 h1= ((c&0xFCFCFCFCUL)>>2)\
1076 + ((d&0xFCFCFCFCUL)>>2);\
1077 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
59fe111e
MN
1078 }\
1079}\
669ac79c
MN
1080\
1081static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1082 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1083}\
1084\
1085static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1086 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1087}\
1088\
1089static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1090 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1091}\
1092\
1093static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1094 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1095}\
1096\
b3184779
MN
1097static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1098 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
59fe111e
MN
1099 int i;\
1100 for(i=0; i<h; i++){\
b3184779 1101 uint32_t a, b, c, d, l0, l1, h0, h1;\
905694d9
RS
1102 a= AV_RN32(&src1[i*src_stride1]);\
1103 b= AV_RN32(&src2[i*src_stride2]);\
1104 c= AV_RN32(&src3[i*src_stride3]);\
1105 d= AV_RN32(&src4[i*src_stride4]);\
b3184779
MN
1106 l0= (a&0x03030303UL)\
1107 + (b&0x03030303UL)\
1108 + 0x01010101UL;\
1109 h0= ((a&0xFCFCFCFCUL)>>2)\
1110 + ((b&0xFCFCFCFCUL)>>2);\
1111 l1= (c&0x03030303UL)\
1112 + (d&0x03030303UL);\
1113 h1= ((c&0xFCFCFCFCUL)>>2)\
1114 + ((d&0xFCFCFCFCUL)>>2);\
1115 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
905694d9
RS
1116 a= AV_RN32(&src1[i*src_stride1+4]);\
1117 b= AV_RN32(&src2[i*src_stride2+4]);\
1118 c= AV_RN32(&src3[i*src_stride3+4]);\
1119 d= AV_RN32(&src4[i*src_stride4+4]);\
b3184779
MN
1120 l0= (a&0x03030303UL)\
1121 + (b&0x03030303UL)\
1122 + 0x01010101UL;\
1123 h0= ((a&0xFCFCFCFCUL)>>2)\
1124 + ((b&0xFCFCFCFCUL)>>2);\
1125 l1= (c&0x03030303UL)\
1126 + (d&0x03030303UL);\
1127 h1= ((c&0xFCFCFCFCUL)>>2)\
1128 + ((d&0xFCFCFCFCUL)>>2);\
1129 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
59fe111e
MN
1130 }\
1131}\
b3184779
MN
1132static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1133 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1134 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1135 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1136}\
1137static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1138 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1139 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1140 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1141}\
59fe111e 1142\
669ac79c
MN
1143static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1144{\
1145 int i, a0, b0, a1, b1;\
1146 a0= pixels[0];\
1147 b0= pixels[1] + 2;\
1148 a0 += b0;\
1149 b0 += pixels[2];\
1150\
1151 pixels+=line_size;\
1152 for(i=0; i<h; i+=2){\
1153 a1= pixels[0];\
1154 b1= pixels[1];\
1155 a1 += b1;\
1156 b1 += pixels[2];\
1157\
1158 block[0]= (a1+a0)>>2; /* FIXME non put */\
1159 block[1]= (b1+b0)>>2;\
1160\
1161 pixels+=line_size;\
1162 block +=line_size;\
1163\
1164 a0= pixels[0];\
1165 b0= pixels[1] + 2;\
1166 a0 += b0;\
1167 b0 += pixels[2];\
1168\
1169 block[0]= (a1+a0)>>2;\
1170 block[1]= (b1+b0)>>2;\
1171 pixels+=line_size;\
1172 block +=line_size;\
1173 }\
1174}\
1175\
1176static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1177{\
1178 int i;\
905694d9
RS
1179 const uint32_t a= AV_RN32(pixels );\
1180 const uint32_t b= AV_RN32(pixels+1);\
669ac79c
MN
1181 uint32_t l0= (a&0x03030303UL)\
1182 + (b&0x03030303UL)\
1183 + 0x02020202UL;\
1184 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1185 + ((b&0xFCFCFCFCUL)>>2);\
1186 uint32_t l1,h1;\
1187\
1188 pixels+=line_size;\
1189 for(i=0; i<h; i+=2){\
905694d9
RS
1190 uint32_t a= AV_RN32(pixels );\
1191 uint32_t b= AV_RN32(pixels+1);\
669ac79c
MN
1192 l1= (a&0x03030303UL)\
1193 + (b&0x03030303UL);\
1194 h1= ((a&0xFCFCFCFCUL)>>2)\
1195 + ((b&0xFCFCFCFCUL)>>2);\
1196 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1197 pixels+=line_size;\
1198 block +=line_size;\
905694d9
RS
1199 a= AV_RN32(pixels );\
1200 b= AV_RN32(pixels+1);\
669ac79c
MN
1201 l0= (a&0x03030303UL)\
1202 + (b&0x03030303UL)\
1203 + 0x02020202UL;\
1204 h0= ((a&0xFCFCFCFCUL)>>2)\
1205 + ((b&0xFCFCFCFCUL)>>2);\
1206 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1207 pixels+=line_size;\
1208 block +=line_size;\
1209 }\
1210}\
1211\
45553457 1212static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
1213{\
1214 int j;\
1215 for(j=0; j<2; j++){\
1216 int i;\
905694d9
RS
1217 const uint32_t a= AV_RN32(pixels );\
1218 const uint32_t b= AV_RN32(pixels+1);\
59fe111e
MN
1219 uint32_t l0= (a&0x03030303UL)\
1220 + (b&0x03030303UL)\
1221 + 0x02020202UL;\
1222 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1223 + ((b&0xFCFCFCFCUL)>>2);\
1224 uint32_t l1,h1;\
1225\
1226 pixels+=line_size;\
1227 for(i=0; i<h; i+=2){\
905694d9
RS
1228 uint32_t a= AV_RN32(pixels );\
1229 uint32_t b= AV_RN32(pixels+1);\
59fe111e
MN
1230 l1= (a&0x03030303UL)\
1231 + (b&0x03030303UL);\
1232 h1= ((a&0xFCFCFCFCUL)>>2)\
1233 + ((b&0xFCFCFCFCUL)>>2);\
1234 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1235 pixels+=line_size;\
1236 block +=line_size;\
905694d9
RS
1237 a= AV_RN32(pixels );\
1238 b= AV_RN32(pixels+1);\
59fe111e
MN
1239 l0= (a&0x03030303UL)\
1240 + (b&0x03030303UL)\
1241 + 0x02020202UL;\
1242 h0= ((a&0xFCFCFCFCUL)>>2)\
1243 + ((b&0xFCFCFCFCUL)>>2);\
1244 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1245 pixels+=line_size;\
1246 block +=line_size;\
1247 }\
1248 pixels+=4-line_size*(h+1);\
1249 block +=4-line_size*h;\
1250 }\
1251}\
1252\
45553457 1253static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
59fe111e
MN
1254{\
1255 int j;\
1256 for(j=0; j<2; j++){\
1257 int i;\
905694d9
RS
1258 const uint32_t a= AV_RN32(pixels );\
1259 const uint32_t b= AV_RN32(pixels+1);\
59fe111e
MN
1260 uint32_t l0= (a&0x03030303UL)\
1261 + (b&0x03030303UL)\
1262 + 0x01010101UL;\
1263 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1264 + ((b&0xFCFCFCFCUL)>>2);\
1265 uint32_t l1,h1;\
1266\
1267 pixels+=line_size;\
1268 for(i=0; i<h; i+=2){\
905694d9
RS
1269 uint32_t a= AV_RN32(pixels );\
1270 uint32_t b= AV_RN32(pixels+1);\
59fe111e
MN
1271 l1= (a&0x03030303UL)\
1272 + (b&0x03030303UL);\
1273 h1= ((a&0xFCFCFCFCUL)>>2)\
1274 + ((b&0xFCFCFCFCUL)>>2);\
1275 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1276 pixels+=line_size;\
1277 block +=line_size;\
905694d9
RS
1278 a= AV_RN32(pixels );\
1279 b= AV_RN32(pixels+1);\
59fe111e
MN
1280 l0= (a&0x03030303UL)\
1281 + (b&0x03030303UL)\
1282 + 0x01010101UL;\
1283 h0= ((a&0xFCFCFCFCUL)>>2)\
1284 + ((b&0xFCFCFCFCUL)>>2);\
1285 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1286 pixels+=line_size;\
1287 block +=line_size;\
1288 }\
1289 pixels+=4-line_size*(h+1);\
1290 block +=4-line_size*h;\
1291 }\
1292}\
1293\
45553457
ZK
1294CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
1295CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
1296CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
1297CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
1298CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
1299CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
1300CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
1301CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
b3184779 1302
d8085ea7 1303#define op_avg(a, b) a = rnd_avg32(a, b)
59fe111e 1304#endif
59fe111e
MN
1305#define op_put(a, b) a = b
1306
1307PIXOP2(avg, op_avg)
1308PIXOP2(put, op_put)
1309#undef op_avg
1310#undef op_put
1311
de6d9b64
FB
1312#define avg2(a,b) ((a+b+1)>>1)
1313#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
1314
c0a0170c
MN
1315static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
1316 put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
1317}
1318
1319static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
1320 put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
1321}
073b013d 1322
0c1a9eda 1323static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
44eb4951
MN
1324{
1325 const int A=(16-x16)*(16-y16);
1326 const int B=( x16)*(16-y16);
1327 const int C=(16-x16)*( y16);
1328 const int D=( x16)*( y16);
1329 int i;
44eb4951
MN
1330
1331 for(i=0; i<h; i++)
1332 {
b3184779
MN
1333 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
1334 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
1335 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
1336 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
1337 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
1338 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
1339 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
1340 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
1341 dst+= stride;
1342 src+= stride;
44eb4951
MN
1343 }
1344}
1345
703c8195 1346void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
073b013d
MN
1347 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
1348{
1349 int y, vx, vy;
1350 const int s= 1<<shift;
115329f1 1351
073b013d
MN
1352 width--;
1353 height--;
1354
1355 for(y=0; y<h; y++){
1356 int x;
1357
1358 vx= ox;
1359 vy= oy;
1360 for(x=0; x<8; x++){ //XXX FIXME optimize
1361 int src_x, src_y, frac_x, frac_y, index;
1362
1363 src_x= vx>>16;
1364 src_y= vy>>16;
1365 frac_x= src_x&(s-1);
1366 frac_y= src_y&(s-1);
1367 src_x>>=shift;
1368 src_y>>=shift;
115329f1 1369
073b013d
MN
1370 if((unsigned)src_x < width){
1371 if((unsigned)src_y < height){
1372 index= src_x + src_y*stride;
1373 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
1374 + src[index +1]* frac_x )*(s-frac_y)
1375 + ( src[index+stride ]*(s-frac_x)
1376 + src[index+stride+1]* frac_x )* frac_y
1377 + r)>>(shift*2);
1378 }else{
f66e4f5f 1379 index= src_x + av_clip(src_y, 0, height)*stride;
115329f1 1380 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
073b013d
MN
1381 + src[index +1]* frac_x )*s
1382 + r)>>(shift*2);
1383 }
1384 }else{
1385 if((unsigned)src_y < height){
f66e4f5f 1386 index= av_clip(src_x, 0, width) + src_y*stride;
115329f1 1387 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
073b013d
MN
1388 + src[index+stride ]* frac_y )*s
1389 + r)>>(shift*2);
1390 }else{
f66e4f5f 1391 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
073b013d
MN
1392 dst[y*stride + x]= src[index ];
1393 }
1394 }
115329f1 1395
073b013d
MN
1396 vx+= dxx;
1397 vy+= dyx;
1398 }
1399 ox += dxy;
1400 oy += dyy;
1401 }
1402}
669ac79c
MN
1403
1404static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1405 switch(width){
1406 case 2: put_pixels2_c (dst, src, stride, height); break;
1407 case 4: put_pixels4_c (dst, src, stride, height); break;
1408 case 8: put_pixels8_c (dst, src, stride, height); break;
1409 case 16:put_pixels16_c(dst, src, stride, height); break;
1410 }
1411}
1412
1413static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1414 int i,j;
1415 for (i=0; i < height; i++) {
1416 for (j=0; j < width; j++) {
bb270c08 1417 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
669ac79c
MN
1418 }
1419 src += stride;
1420 dst += stride;
1421 }
1422}
1423
1424static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1425 int i,j;
1426 for (i=0; i < height; i++) {
1427 for (j=0; j < width; j++) {
bb270c08 1428 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
669ac79c
MN
1429 }
1430 src += stride;
1431 dst += stride;
1432 }
1433}
115329f1 1434
669ac79c
MN
1435static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1436 int i,j;
1437 for (i=0; i < height; i++) {
1438 for (j=0; j < width; j++) {
bb270c08 1439 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
669ac79c
MN
1440 }
1441 src += stride;
1442 dst += stride;
1443 }
1444}
115329f1 1445
669ac79c
MN
1446static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1447 int i,j;
1448 for (i=0; i < height; i++) {
1449 for (j=0; j < width; j++) {
bb270c08 1450 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
1451 }
1452 src += stride;
1453 dst += stride;
1454 }
1455}
1456
1457static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1458 int i,j;
1459 for (i=0; i < height; i++) {
1460 for (j=0; j < width; j++) {
bb270c08 1461 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
1462 }
1463 src += stride;
1464 dst += stride;
1465 }
1466}
1467
1468static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1469 int i,j;
1470 for (i=0; i < height; i++) {
1471 for (j=0; j < width; j++) {
bb270c08 1472 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
669ac79c
MN
1473 }
1474 src += stride;
1475 dst += stride;
1476 }
1477}
1478
1479static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1480 int i,j;
1481 for (i=0; i < height; i++) {
1482 for (j=0; j < width; j++) {
bb270c08 1483 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
1484 }
1485 src += stride;
1486 dst += stride;
1487 }
1488}
1489
1490static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1491 int i,j;
1492 for (i=0; i < height; i++) {
1493 for (j=0; j < width; j++) {
bb270c08 1494 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
669ac79c
MN
1495 }
1496 src += stride;
1497 dst += stride;
1498 }
1499}
da3b9756
MM
1500
1501static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1502 switch(width){
1503 case 2: avg_pixels2_c (dst, src, stride, height); break;
1504 case 4: avg_pixels4_c (dst, src, stride, height); break;
1505 case 8: avg_pixels8_c (dst, src, stride, height); break;
1506 case 16:avg_pixels16_c(dst, src, stride, height); break;
1507 }
1508}
1509
1510static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1511 int i,j;
1512 for (i=0; i < height; i++) {
1513 for (j=0; j < width; j++) {
bb270c08 1514 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
1515 }
1516 src += stride;
1517 dst += stride;
1518 }
1519}
1520
1521static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1522 int i,j;
1523 for (i=0; i < height; i++) {
1524 for (j=0; j < width; j++) {
bb270c08 1525 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
1526 }
1527 src += stride;
1528 dst += stride;
1529 }
1530}
115329f1 1531
da3b9756
MM
1532static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1533 int i,j;
1534 for (i=0; i < height; i++) {
1535 for (j=0; j < width; j++) {
bb270c08 1536 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
1537 }
1538 src += stride;
1539 dst += stride;
1540 }
1541}
115329f1 1542
da3b9756
MM
1543static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1544 int i,j;
1545 for (i=0; i < height; i++) {
1546 for (j=0; j < width; j++) {
bb270c08 1547 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
1548 }
1549 src += stride;
1550 dst += stride;
1551 }
1552}
1553
1554static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1555 int i,j;
1556 for (i=0; i < height; i++) {
1557 for (j=0; j < width; j++) {
bb270c08 1558 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
1559 }
1560 src += stride;
1561 dst += stride;
1562 }
1563}
1564
1565static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1566 int i,j;
1567 for (i=0; i < height; i++) {
1568 for (j=0; j < width; j++) {
bb270c08 1569 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
da3b9756
MM
1570 }
1571 src += stride;
1572 dst += stride;
1573 }
1574}
1575
1576static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1577 int i,j;
1578 for (i=0; i < height; i++) {
1579 for (j=0; j < width; j++) {
bb270c08 1580 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
1581 }
1582 src += stride;
1583 dst += stride;
1584 }
1585}
1586
1587static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1588 int i,j;
1589 for (i=0; i < height; i++) {
1590 for (j=0; j < width; j++) {
bb270c08 1591 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
da3b9756
MM
1592 }
1593 src += stride;
1594 dst += stride;
1595 }
1596}
669ac79c
MN
1597#if 0
1598#define TPEL_WIDTH(width)\
1599static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1600 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
1601static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1602 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
1603static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1604 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
1605static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1606 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
1607static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1608 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
1609static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1610 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
1611static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1612 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
1613static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1614 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
1615static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1616 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
1617#endif
1618
0da71265
MN
1619#define H264_CHROMA_MC(OPNAME, OP)\
1620static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1621 const int A=(8-x)*(8-y);\
1622 const int B=( x)*(8-y);\
1623 const int C=(8-x)*( y);\
1624 const int D=( x)*( y);\
1625 int i;\
1626 \
1627 assert(x<8 && y<8 && x>=0 && y>=0);\
1628\
febdd0b9 1629 if(D){\
f315b394 1630 for(i=0; i<h; i++){\
76abb18e
MN
1631 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1632 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1633 dst+= stride;\
1634 src+= stride;\
1635 }\
febdd0b9
MN
1636 }else{\
1637 const int E= B+C;\
1638 const int step= C ? stride : 1;\
f315b394 1639 for(i=0; i<h; i++){\
febdd0b9
MN
1640 OP(dst[0], (A*src[0] + E*src[step+0]));\
1641 OP(dst[1], (A*src[1] + E*src[step+1]));\
1642 dst+= stride;\
1643 src+= stride;\
1644 }\
1645 }\
0da71265
MN
1646}\
1647\
1648static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1649 const int A=(8-x)*(8-y);\
1650 const int B=( x)*(8-y);\
1651 const int C=(8-x)*( y);\
1652 const int D=( x)*( y);\
1653 int i;\
1654 \
1655 assert(x<8 && y<8 && x>=0 && y>=0);\
1656\
febdd0b9 1657 if(D){\
f315b394 1658 for(i=0; i<h; i++){\
76abb18e
MN
1659 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1660 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1661 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1662 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1663 dst+= stride;\
1664 src+= stride;\
1665 }\
febdd0b9
MN
1666 }else{\
1667 const int E= B+C;\
1668 const int step= C ? stride : 1;\
f315b394 1669 for(i=0; i<h; i++){\
febdd0b9
MN
1670 OP(dst[0], (A*src[0] + E*src[step+0]));\
1671 OP(dst[1], (A*src[1] + E*src[step+1]));\
1672 OP(dst[2], (A*src[2] + E*src[step+2]));\
1673 OP(dst[3], (A*src[3] + E*src[step+3]));\
1674 dst+= stride;\
1675 src+= stride;\
1676 }\
1677 }\
0da71265
MN
1678}\
1679\
1680static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1681 const int A=(8-x)*(8-y);\
1682 const int B=( x)*(8-y);\
1683 const int C=(8-x)*( y);\
1684 const int D=( x)*( y);\
1685 int i;\
1686 \
1687 assert(x<8 && y<8 && x>=0 && y>=0);\
1688\
815c81c0 1689 if(D){\
f315b394 1690 for(i=0; i<h; i++){\
76abb18e
MN
1691 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1692 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1693 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1694 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1695 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
1696 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
1697 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
1698 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
1699 dst+= stride;\
1700 src+= stride;\
1701 }\
815c81c0
MN
1702 }else{\
1703 const int E= B+C;\
1704 const int step= C ? stride : 1;\
f315b394 1705 for(i=0; i<h; i++){\
815c81c0
MN
1706 OP(dst[0], (A*src[0] + E*src[step+0]));\
1707 OP(dst[1], (A*src[1] + E*src[step+1]));\
1708 OP(dst[2], (A*src[2] + E*src[step+2]));\
1709 OP(dst[3], (A*src[3] + E*src[step+3]));\
1710 OP(dst[4], (A*src[4] + E*src[step+4]));\
1711 OP(dst[5], (A*src[5] + E*src[step+5]));\
1712 OP(dst[6], (A*src[6] + E*src[step+6]));\
1713 OP(dst[7], (A*src[7] + E*src[step+7]));\
1714 dst+= stride;\
1715 src+= stride;\
1716 }\
1717 }\
0da71265
MN
1718}
1719
1720#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
1721#define op_put(a, b) a = (((b) + 32)>>6)
1722
1723H264_CHROMA_MC(put_ , op_put)
1724H264_CHROMA_MC(avg_ , op_avg)
1725#undef op_avg
1726#undef op_put
1727
c374691b 1728static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
e34350a3
KS
1729 const int A=(8-x)*(8-y);
1730 const int B=( x)*(8-y);
1731 const int C=(8-x)*( y);
1732 const int D=( x)*( y);
1733 int i;
1734
1735 assert(x<8 && y<8 && x>=0 && y>=0);
1736
1737 for(i=0; i<h; i++)
1738 {
1739 dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6;
1740 dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6;
1741 dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6;
1742 dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6;
1743 dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6;
1744 dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6;
1745 dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6;
1746 dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6;
1747 dst+= stride;
1748 src+= stride;
1749 }
1750}
1751
8013da73
DC
1752static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
1753 const int A=(8-x)*(8-y);
1754 const int B=( x)*(8-y);
1755 const int C=(8-x)*( y);
1756 const int D=( x)*( y);
1757 int i;
1758
1759 assert(x<8 && y<8 && x>=0 && y>=0);
1760
1761 for(i=0; i<h; i++)
1762 {
1763 dst[0] = avg2(dst[0], ((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6));
1764 dst[1] = avg2(dst[1], ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6));
1765 dst[2] = avg2(dst[2], ((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6));
1766 dst[3] = avg2(dst[3], ((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6));
1767 dst[4] = avg2(dst[4], ((A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6));
1768 dst[5] = avg2(dst[5], ((A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6));
1769 dst[6] = avg2(dst[6], ((A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6));
1770 dst[7] = avg2(dst[7], ((A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6));
1771 dst+= stride;
1772 src+= stride;
1773 }
1774}
1775
b3184779 1776#define QPEL_MC(r, OPNAME, RND, OP) \
0c1a9eda 1777static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
55fde95e 1778 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779
MN
1779 int i;\
1780 for(i=0; i<h; i++)\
1781 {\
1782 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
1783 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
1784 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
1785 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
1786 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
1787 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
1788 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
1789 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
1790 dst+=dstStride;\
1791 src+=srcStride;\
1792 }\
44eb4951
MN
1793}\
1794\
0c1a9eda 1795static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
db794953 1796 const int w=8;\
55fde95e 1797 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779
MN
1798 int i;\
1799 for(i=0; i<w; i++)\
1800 {\
1801 const int src0= src[0*srcStride];\
1802 const int src1= src[1*srcStride];\
1803 const int src2= src[2*srcStride];\
1804 const int src3= src[3*srcStride];\
1805 const int src4= src[4*srcStride];\
1806 const int src5= src[5*srcStride];\
1807 const int src6= src[6*srcStride];\
1808 const int src7= src[7*srcStride];\
1809 const int src8= src[8*srcStride];\
1810 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
1811 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
1812 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
1813 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
1814 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
1815 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
1816 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
1817 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
1818 dst++;\
1819 src++;\
1820 }\
1821}\
1822\
0c1a9eda 1823static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
55fde95e 1824 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779 1825 int i;\
826f429a 1826 \
b3184779
MN
1827 for(i=0; i<h; i++)\
1828 {\
1829 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
1830 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
1831 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
1832 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
1833 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
1834 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
1835 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
1836 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
1837 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
1838 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
1839 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
1840 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
1841 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
1842 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
1843 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
1844 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
1845 dst+=dstStride;\
1846 src+=srcStride;\
1847 }\
1848}\
1849\
0c1a9eda 1850static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
55fde95e 1851 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
b3184779 1852 int i;\
826f429a 1853 const int w=16;\
b3184779
MN
1854 for(i=0; i<w; i++)\
1855 {\
1856 const int src0= src[0*srcStride];\
1857 const int src1= src[1*srcStride];\
1858 const int src2= src[2*srcStride];\
1859 const int src3= src[3*srcStride];\
1860 const int src4= src[4*srcStride];\
1861 const int src5= src[5*srcStride];\
1862 const int src6= src[6*srcStride];\
1863 const int src7= src[7*srcStride];\
1864 const int src8= src[8*srcStride];\
1865 const int src9= src[9*srcStride];\
1866 const int src10= src[10*srcStride];\
1867 const int src11= src[11*srcStride];\
1868 const int src12= src[12*srcStride];\
1869 const int src13= src[13*srcStride];\
1870 const int src14= src[14*srcStride];\
1871 const int src15= src[15*srcStride];\
1872 const int src16= src[16*srcStride];\
1873 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
1874 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
1875 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
1876 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
1877 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
1878 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
1879 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
1880 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
1881 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
1882 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
1883 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
1884 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
1885 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
1886 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
1887 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
1888 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
1889 dst++;\
1890 src++;\
1891 }\
1892}\
1893\
0c1a9eda 1894static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
45553457 1895 OPNAME ## pixels8_c(dst, src, stride, 8);\
b3184779
MN
1896}\
1897\
0c1a9eda
ZK
1898static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1899 uint8_t half[64];\
b3184779
MN
1900 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1901 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
44eb4951
MN
1902}\
1903\
0c1a9eda 1904static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
b3184779 1905 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
44eb4951
MN
1906}\
1907\
0c1a9eda
ZK
1908static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1909 uint8_t half[64];\
b3184779
MN
1910 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1911 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
44eb4951
MN
1912}\
1913\
0c1a9eda
ZK
1914static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1915 uint8_t full[16*9];\
1916 uint8_t half[64];\
b3184779 1917 copy_block9(full, src, 16, stride, 9);\
db794953 1918 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
b3184779 1919 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
44eb4951
MN
1920}\
1921\
0c1a9eda
ZK
1922static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1923 uint8_t full[16*9];\
b3184779 1924 copy_block9(full, src, 16, stride, 9);\
db794953 1925 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
44eb4951
MN
1926}\
1927\
0c1a9eda
ZK
1928static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1929 uint8_t full[16*9];\
1930 uint8_t half[64];\
b3184779 1931 copy_block9(full, src, 16, stride, 9);\
db794953 1932 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
b3184779 1933 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
44eb4951 1934}\
0c1a9eda
ZK
1935void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1936 uint8_t full[16*9];\
1937 uint8_t halfH[72];\
1938 uint8_t halfV[64];\
1939 uint8_t halfHV[64];\
b3184779
MN
1940 copy_block9(full, src, 16, stride, 9);\
1941 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1942 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1943 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1944 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1945}\
0c1a9eda
ZK
1946static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1947 uint8_t full[16*9];\
1948 uint8_t halfH[72];\
1949 uint8_t halfHV[64];\
db794953
MN
1950 copy_block9(full, src, 16, stride, 9);\
1951 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1952 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1953 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1954 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1955}\
0c1a9eda
ZK
1956void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1957 uint8_t full[16*9];\
1958 uint8_t halfH[72];\
1959 uint8_t halfV[64];\
1960 uint8_t halfHV[64];\
b3184779
MN
1961 copy_block9(full, src, 16, stride, 9);\
1962 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1963 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1964 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1965 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1966}\
0c1a9eda
ZK
1967static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1968 uint8_t full[16*9];\
1969 uint8_t halfH[72];\
1970 uint8_t halfHV[64];\
db794953
MN
1971 copy_block9(full, src, 16, stride, 9);\
1972 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1973 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1974 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1975 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1976}\
0c1a9eda
ZK
1977void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1978 uint8_t full[16*9];\
1979 uint8_t halfH[72];\
1980 uint8_t halfV[64];\
1981 uint8_t halfHV[64];\
b3184779
MN
1982 copy_block9(full, src, 16, stride, 9);\
1983 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
1984 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1985 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 1986 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 1987}\
0c1a9eda
ZK
1988static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1989 uint8_t full[16*9];\
1990 uint8_t halfH[72];\
1991 uint8_t halfHV[64];\
db794953
MN
1992 copy_block9(full, src, 16, stride, 9);\
1993 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1994 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1995 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1996 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1997}\
0c1a9eda
ZK
1998void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1999 uint8_t full[16*9];\
2000 uint8_t halfH[72];\
2001 uint8_t halfV[64];\
2002 uint8_t halfHV[64];\
b3184779
MN
2003 copy_block9(full, src, 16, stride, 9);\
2004 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
db794953
MN
2005 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
2006 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 2007 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
44eb4951 2008}\
0c1a9eda
ZK
2009static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2010 uint8_t full[16*9];\
2011 uint8_t halfH[72];\
2012 uint8_t halfHV[64];\
db794953
MN
2013 copy_block9(full, src, 16, stride, 9);\
2014 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
2015 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
2016 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
2017 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
2018}\
0c1a9eda
ZK
2019static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2020 uint8_t halfH[72];\
2021 uint8_t halfHV[64];\
b3184779 2022 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 2023 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 2024 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
44eb4951 2025}\
0c1a9eda
ZK
2026static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2027 uint8_t halfH[72];\
2028 uint8_t halfHV[64];\
b3184779 2029 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 2030 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 2031 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
44eb4951 2032}\
0c1a9eda
ZK
2033void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
2034 uint8_t full[16*9];\
2035 uint8_t halfH[72];\
2036 uint8_t halfV[64];\
2037 uint8_t halfHV[64];\
b3184779
MN
2038 copy_block9(full, src, 16, stride, 9);\
2039 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
2040 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
2041 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 2042 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 2043}\
0c1a9eda
ZK
2044static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2045 uint8_t full[16*9];\
2046 uint8_t halfH[72];\
db794953
MN
2047 copy_block9(full, src, 16, stride, 9);\
2048 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
2049 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
2050 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
2051}\
0c1a9eda
ZK
2052void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
2053 uint8_t full[16*9];\
2054 uint8_t halfH[72];\
2055 uint8_t halfV[64];\
2056 uint8_t halfHV[64];\
b3184779
MN
2057 copy_block9(full, src, 16, stride, 9);\
2058 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
db794953
MN
2059 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
2060 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
b3184779 2061 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
44eb4951 2062}\
0c1a9eda
ZK
2063static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2064 uint8_t full[16*9];\
2065 uint8_t halfH[72];\
db794953
MN
2066 copy_block9(full, src, 16, stride, 9);\
2067 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
2068 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
2069 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
2070}\
0c1a9eda
ZK
2071static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2072 uint8_t halfH[72];\
b3184779 2073 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
db794953 2074 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
b3184779 2075}\
0c1a9eda 2076static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
45553457 2077 OPNAME ## pixels16_c(dst, src, stride, 16);\
b3184779
MN
2078}\
2079\
0c1a9eda
ZK
2080static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2081 uint8_t half[256];\
b3184779
MN
2082 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
2083 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
2084}\
2085\
0c1a9eda 2086static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
b3184779 2087 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
44eb4951 2088}\
b3184779 2089\
0c1a9eda
ZK
2090static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2091 uint8_t half[256];\
b3184779
MN
2092 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
2093 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
2094}\
2095\
0c1a9eda
ZK
2096static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2097 uint8_t full[24*17];\
2098 uint8_t half[256];\
b3184779 2099 copy_block17(full, src, 24, stride, 17);\
826f429a 2100 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
b3184779
MN
2101 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
2102}\
2103\
0c1a9eda
ZK
2104static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2105 uint8_t full[24*17];\
b3184779 2106 copy_block17(full, src, 24, stride, 17);\
826f429a 2107 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
b3184779
MN
2108}\
2109\
0c1a9eda
ZK
2110static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2111 uint8_t full[24*17];\
2112 uint8_t half[256];\
b3184779 2113 copy_block17(full, src, 24, stride, 17);\
826f429a 2114 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
b3184779
MN
2115 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
2116}\
0c1a9eda
ZK
2117void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
2118 uint8_t full[24*17];\
2119 uint8_t halfH[272];\
2120 uint8_t halfV[256];\
2121 uint8_t halfHV[256];\
b3184779
MN
2122 copy_block17(full, src, 24, stride, 17);\
2123 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
2124 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2125 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
2126 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2127}\
0c1a9eda
ZK
2128static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2129 uint8_t full[24*17];\
2130 uint8_t halfH[272];\
2131 uint8_t halfHV[256];\
db794953
MN
2132 copy_block17(full, src, 24, stride, 17);\
2133 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2134 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2135 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2136 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2137}\
0c1a9eda
ZK
2138void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
2139 uint8_t full[24*17];\
2140 uint8_t halfH[272];\
2141 uint8_t halfV[256];\
2142 uint8_t halfHV[256];\
b3184779
MN
2143 copy_block17(full, src, 24, stride, 17);\
2144 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
2145 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2146 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
2147 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2148}\
0c1a9eda
ZK
2149static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2150 uint8_t full[24*17];\
2151 uint8_t halfH[272];\
2152 uint8_t halfHV[256];\
db794953
MN
2153 copy_block17(full, src, 24, stride, 17);\
2154 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2155 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2156 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2157 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2158}\
0c1a9eda
ZK
2159void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
2160 uint8_t full[24*17];\
2161 uint8_t halfH[272];\
2162 uint8_t halfV[256];\
2163 uint8_t halfHV[256];\
b3184779
MN
2164 copy_block17(full, src, 24, stride, 17);\
2165 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
2166 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2167 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
2168 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2169}\
0c1a9eda
ZK
2170static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2171 uint8_t full[24*17];\
2172 uint8_t halfH[272];\
2173 uint8_t halfHV[256];\
db794953
MN
2174 copy_block17(full, src, 24, stride, 17);\
2175 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2176 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2177 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2178 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2179}\
0c1a9eda
ZK
2180void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
2181 uint8_t full[24*17];\
2182 uint8_t halfH[272];\
2183 uint8_t halfV[256];\
2184 uint8_t halfHV[256];\
b3184779
MN
2185 copy_block17(full, src, 24, stride, 17);\
2186 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
826f429a
MN
2187 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2188 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
2189 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2190}\
0c1a9eda
ZK
2191static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2192 uint8_t full[24*17];\
2193 uint8_t halfH[272];\
2194 uint8_t halfHV[256];\
db794953
MN
2195 copy_block17(full, src, 24, stride, 17);\
2196 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2197 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2198 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2199 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2200}\
0c1a9eda
ZK
2201static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2202 uint8_t halfH[272];\
2203 uint8_t halfHV[256];\
b3184779 2204 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 2205 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
2206 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2207}\
0c1a9eda
ZK
2208static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2209 uint8_t halfH[272];\
2210 uint8_t halfHV[256];\
b3184779 2211 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 2212 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
2213 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2214}\
0c1a9eda
ZK
2215void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
2216 uint8_t full[24*17];\
2217 uint8_t halfH[272];\
2218 uint8_t halfV[256];\
2219 uint8_t halfHV[256];\
b3184779
MN
2220 copy_block17(full, src, 24, stride, 17);\
2221 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
2222 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2223 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
2224 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2225}\
0c1a9eda
ZK
2226static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2227 uint8_t full[24*17];\
2228 uint8_t halfH[272];\
db794953
MN
2229 copy_block17(full, src, 24, stride, 17);\
2230 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2231 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2232 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2233}\
0c1a9eda
ZK
2234void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
2235 uint8_t full[24*17];\
2236 uint8_t halfH[272];\
2237 uint8_t halfV[256];\
2238 uint8_t halfHV[256];\
b3184779
MN
2239 copy_block17(full, src, 24, stride, 17);\
2240 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
826f429a
MN
2241 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2242 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
b3184779
MN
2243 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2244}\
0c1a9eda
ZK
2245static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2246 uint8_t full[24*17];\
2247 uint8_t halfH[272];\
db794953
MN
2248 copy_block17(full, src, 24, stride, 17);\
2249 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2250 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2251 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2252}\
0c1a9eda
ZK
2253static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2254 uint8_t halfH[272];\
b3184779 2255 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
826f429a 2256 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
45553457 2257}
44eb4951 2258
b3184779
MN
2259#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2260#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
2261#define op_put(a, b) a = cm[((b) + 16)>>5]
2262#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
2263
2264QPEL_MC(0, put_ , _ , op_put)
2265QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
2266QPEL_MC(0, avg_ , _ , op_avg)
2267//QPEL_MC(1, avg_no_rnd , _ , op_avg)
2268#undef op_avg
2269#undef op_avg_no_rnd
2270#undef op_put
2271#undef op_put_no_rnd
44eb4951 2272
0da71265
MN
2273#if 1
2274#define H264_LOWPASS(OPNAME, OP, OP2) \
bb5705b9 2275static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
80e44bc3 2276 const int h=2;\
55fde95e 2277 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
80e44bc3
MN
2278 int i;\
2279 for(i=0; i<h; i++)\
2280 {\
2281 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2282 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2283 dst+=dstStride;\
2284 src+=srcStride;\
2285 }\
2286}\
2287\
bb5705b9 2288static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
80e44bc3 2289 const int w=2;\
55fde95e 2290 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
80e44bc3
MN
2291 int i;\
2292 for(i=0; i<w; i++)\
2293 {\
2294 const int srcB= src[-2*srcStride];\
2295 const int srcA= src[-1*srcStride];\
2296 const int src0= src[0 *srcStride];\
2297 const int src1= src[1 *srcStride];\
2298 const int src2= src[2 *srcStride];\
2299 const int src3= src[3 *srcStride];\
2300 const int src4= src[4 *srcStride];\
2301 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2302 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2303 dst++;\
2304 src++;\
2305 }\
2306}\
2307\
bb5705b9 2308static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
80e44bc3
MN
2309 const int h=2;\
2310 const int w=2;\
55fde95e 2311 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
80e44bc3
MN
2312 int i;\
2313 src -= 2*srcStride;\
2314 for(i=0; i<h+5; i++)\
2315 {\
2316 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2317 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2318 tmp+=tmpStride;\
2319 src+=srcStride;\
2320 }\
2321 tmp -= tmpStride*(h+5-2);\
2322 for(i=0; i<w; i++)\
2323 {\
2324 const int tmpB= tmp[-2*tmpStride];\
2325 const int tmpA= tmp[-1*tmpStride];\
2326 const int tmp0= tmp[0 *tmpStride];\
2327 const int tmp1= tmp[1 *tmpStride];\
2328 const int tmp2= tmp[2 *tmpStride];\
2329 const int tmp3= tmp[3 *tmpStride];\
2330 const int tmp4= tmp[4 *tmpStride];\
2331 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2332 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2333 dst++;\
2334 tmp++;\
2335 }\
2336}\
0da71265
MN
2337static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2338 const int h=4;\
55fde95e 2339 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
0da71265
MN
2340 int i;\
2341 for(i=0; i<h; i++)\
2342 {\
2343 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2344 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2345 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
2346 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
2347 dst+=dstStride;\
2348 src+=srcStride;\
2349 }\
2350}\
2351\
2352static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2353 const int w=4;\
55fde95e 2354 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
0da71265
MN
2355 int i;\
2356 for(i=0; i<w; i++)\
2357 {\
2358 const int srcB= src[-2*srcStride];\
2359 const int srcA= src[-1*srcStride];\
2360 const int src0= src[0 *srcStride];\
2361 const int src1= src[1 *srcStride];\
2362 const int src2= src[2 *srcStride];\
2363 const int src3= src[3 *srcStride];\
2364 const int src4= src[4 *srcStride];\
2365 const int src5= src[5 *srcStride];\
2366 const int src6= src[6 *srcStride];\
2367 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2368 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2369 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2370 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2371 dst++;\
2372 src++;\
2373 }\
2374}\
2375\
2376static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2377 const int h=4;\
2378 const int w=4;\
55fde95e 2379 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
0da71265
MN
2380 int i;\
2381 src -= 2*srcStride;\
2382 for(i=0; i<h+5; i++)\
2383 {\
2384 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2385 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2386 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
2387 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
2388 tmp+=tmpStride;\
2389 src+=srcStride;\
2390 }\
2391 tmp -= tmpStride*(h+5-2);\
2392 for(i=0; i<w; i++)\
2393 {\
2394 const int tmpB= tmp[-2*tmpStride];\
2395 const int tmpA= tmp[-1*tmpStride];\
2396 const int tmp0= tmp[0 *tmpStride];\
2397 const int tmp1= tmp[1 *tmpStride];\
2398 const int tmp2= tmp[2 *tmpStride];\
2399 const int tmp3= tmp[3 *tmpStride];\
2400 const int tmp4= tmp[4 *tmpStride];\
2401 const int tmp5= tmp[5 *tmpStride];\
2402 const int tmp6= tmp[6 *tmpStride];\
2403 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2404 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2405 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2406 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2407 dst++;\
2408 tmp++;\
2409 }\
2410}\
2411\
2412static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2413 const int h=8;\
55fde95e 2414 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
0da71265
MN
2415 int i;\
2416 for(i=0; i<h; i++)\
2417 {\
2418 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
2419 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
2420 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
2421 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
2422 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
2423 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
2424 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
2425 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
2426 dst+=dstStride;\
2427 src+=srcStride;\
2428 }\
2429}\
2430\
2431static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2432 const int w=8;\
55fde95e 2433 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
0da71265
MN
2434 int i;\
2435 for(i=0; i<w; i++)\
2436 {\
2437 const int srcB= src[-2*srcStride];\
2438 const int srcA= src[-1*srcStride];\
2439 const int src0= src[0 *srcStride];\
2440 const int src1= src[1 *srcStride];\
2441 const int src2= src[2 *srcStride];\
2442 const int src3= src[3 *srcStride];\
2443 const int src4= src[4 *srcStride];\
2444 const int src5= src[5 *srcStride];\
2445 const int src6= src[6 *srcStride];\
2446 const int src7= src[7 *srcStride];\
2447 const int src8= src[8 *srcStride];\
2448 const int src9= src[9 *srcStride];\
2449 const int src10=src[10*srcStride];\
2450 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2451 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2452 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2453 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2454 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
2455 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
2456 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
2457 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
2458 dst++;\
2459 src++;\
2460 }\
2461}\
2462\
2463static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2464 const int h=8;\
2465 const int w=8;\
55fde95e 2466 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
0da71265
MN
2467 int i;\
2468 src -= 2*srcStride;\
2469 for(i=0; i<h+5; i++)\
2470 {\
2471 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
2472 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
2473 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
2474 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
2475 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
2476 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
2477 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
2478 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
2479 tmp+=tmpStride;\
2480 src+=srcStride;\
2481 }\
2482 tmp -= tmpStride*(h+5-2);\
2483 for(i=0; i<w; i++)\
2484 {\
2485 const int tmpB= tmp[-2*tmpStride];\
2486 const int tmpA= tmp[-1*tmpStride];\
2487 const int tmp0= tmp[0 *tmpStride];\
2488 const int tmp1= tmp[1 *tmpStride];\
2489 const int tmp2= tmp[2 *tmpStride];\
2490 const int tmp3= tmp[3 *tmpStride];\
2491 const int tmp4= tmp[4 *tmpStride];\
2492 const int tmp5= tmp[5 *tmpStride];\
2493 const int tmp6= tmp[6 *tmpStride];\
2494 const int tmp7= tmp[7 *tmpStride];\
2495 const int tmp8= tmp[8 *tmpStride];\
2496 const int tmp9= tmp[9 *tmpStride];\
2497 const int tmp10=tmp[10*tmpStride];\
2498 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2499 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2500 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2501 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2502 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
2503 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
2504 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
2505 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
2506 dst++;\
2507 tmp++;\
2508 }\
2509}\
2510\
2511static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2512 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
2513 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2514 src += 8*srcStride;\
2515 dst += 8*dstStride;\
2516 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
2517 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2518}\
2519\
2520static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2521 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
2522 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2523 src += 8*srcStride;\
2524 dst += 8*dstStride;\
2525 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
2526 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2527}\
2528\
2529static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2530 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2531 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2532 src += 8*srcStride;\
0da71265
MN
2533 dst += 8*dstStride;\
2534 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2535 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2536}\
2537
2538#define H264_MC(OPNAME, SIZE) \
2539static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
2540 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
2541}\
2542\
2543static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2544 uint8_t half[SIZE*SIZE];\
2545 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2546 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
2547}\
2548\
2549static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
2550 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
2551}\
2552\
2553static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2554 uint8_t half[SIZE*SIZE];\
2555 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2556 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
2557}\
2558\
2559static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2560 uint8_t full[SIZE*(SIZE+5)];\
2561 uint8_t * const full_mid= full + SIZE*2;\
2562 uint8_t half[SIZE*SIZE];\
2563 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2564 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2565 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
2566}\
2567\
2568static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2569 uint8_t full[SIZE*(SIZE+5)];\
2570 uint8_t * const full_mid= full + SIZE*2;\
2571 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2572 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
2573}\
2574\
2575static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2576 uint8_t full[SIZE*(SIZE+5)];\
2577 uint8_t * const full_mid= full + SIZE*2;\
2578 uint8_t half[SIZE*SIZE];\
2579 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2580 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2581 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
2582}\
2583\
2584static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2585 uint8_t full[SIZE*(SIZE+5)];\
2586 uint8_t * const full_mid= full + SIZE*2;\
2587 uint8_t halfH[SIZE*SIZE];\
2588 uint8_t halfV[SIZE*SIZE];\
2589 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2590 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2591 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2592 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2593}\
2594\
2595static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2596 uint8_t full[SIZE*(SIZE+5)];\
2597 uint8_t * const full_mid= full + SIZE*2;\
2598 uint8_t halfH[SIZE*SIZE];\
2599 uint8_t halfV[SIZE*SIZE];\
2600 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2601 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2602 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2603 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2604}\
2605\
2606static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2607 uint8_t full[SIZE*(SIZE+5)];\
2608 uint8_t * const full_mid= full + SIZE*2;\
2609 uint8_t halfH[SIZE*SIZE];\
2610 uint8_t halfV[SIZE*SIZE];\
2611 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2612 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2613 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2614 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2615}\
2616\
2617static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2618 uint8_t full[SIZE*(SIZE+5)];\
2619 uint8_t * const full_mid= full + SIZE*2;\
2620 uint8_t halfH[SIZE*SIZE];\
2621 uint8_t halfV[SIZE*SIZE];\
2622 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2623 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2624 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2625 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2626}\
2627\
2628static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2629 int16_t tmp[SIZE*(SIZE+5)];\
2630 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
2631}\
2632\
2633static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2634 int16_t tmp[SIZE*(SIZE+5)];\
2635 uint8_t halfH[SIZE*SIZE];\
2636 uint8_t halfHV[SIZE*SIZE];\
2637 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2638 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2639 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2640}\
2641\
2642static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2643 int16_t tmp[SIZE*(SIZE+5)];\
2644 uint8_t halfH[SIZE*SIZE];\
2645 uint8_t halfHV[SIZE*SIZE];\
2646 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2647 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2648 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2649}\
2650\
2651static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2652 uint8_t full[SIZE*(SIZE+5)];\
2653 uint8_t * const full_mid= full + SIZE*2;\
2654 int16_t tmp[SIZE*(SIZE+5)];\
2655 uint8_t halfV[SIZE*SIZE];\
2656 uint8_t halfHV[SIZE*SIZE];\
2657 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2658 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2659 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2660 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2661}\
2662\
2663static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2664 uint8_t full[SIZE*(SIZE+5)];\
2665 uint8_t * const full_mid= full + SIZE*2;\
2666 int16_t tmp[SIZE*(SIZE+5)];\
2667 uint8_t halfV[SIZE*SIZE];\
2668 uint8_t halfHV[SIZE*SIZE];\
2669 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2670 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2671 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2672 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2673}\
2674
2675#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2676//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
2677#define op_put(a, b) a = cm[((b) + 16)>>5]
2678#define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
2679#define op2_put(a, b) a = cm[((b) + 512)>>10]
2680
2681H264_LOWPASS(put_ , op_put, op2_put)
2682H264_LOWPASS(avg_ , op_avg, op2_avg)
80e44bc3 2683H264_MC(put_, 2)
0da71265
MN
2684H264_MC(put_, 4)
2685H264_MC(put_, 8)
2686H264_MC(put_, 16)
2687H264_MC(avg_, 4)
2688H264_MC(avg_, 8)
2689H264_MC(avg_, 16)
2690
2691#undef op_avg
2692#undef op_put
2693#undef op2_avg
2694#undef op2_put
2695#endif
2696
f66e4f5f
RD
2697#define op_scale1(x) block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom )
2698#define op_scale2(x) dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
9f2d1b4f
LM
2699#define H264_WEIGHT(W,H) \
2700static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
e8b56208 2701 int y; \
9f2d1b4f
LM
2702 offset <<= log2_denom; \
2703 if(log2_denom) offset += 1<<(log2_denom-1); \
2704 for(y=0; y<H; y++, block += stride){ \
2705 op_scale1(0); \
2706 op_scale1(1); \
2707 if(W==2) continue; \
2708 op_scale1(2); \
2709 op_scale1(3); \
2710 if(W==4) continue; \
2711 op_scale1(4); \
2712 op_scale1(5); \
2713 op_scale1(6); \
2714 op_scale1(7); \
2715 if(W==8) continue; \
2716 op_scale1(8); \
2717 op_scale1(9); \
2718 op_scale1(10); \
2719 op_scale1(11); \
2720 op_scale1(12); \
2721 op_scale1(13); \
2722 op_scale1(14); \
2723 op_scale1(15); \
2724 } \
2725} \
e8b56208
LM
2726static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
2727 int y; \
2728 offset = ((offset + 1) | 1) << log2_denom; \
9f2d1b4f
LM
2729 for(y=0; y<H; y++, dst += stride, src += stride){ \
2730 op_scale2(0); \
2731 op_scale2(1); \
2732 if(W==2) continue; \
2733 op_scale2(2); \
2734 op_scale2(3); \
2735 if(W==4) continue; \
2736 op_scale2(4); \
2737 op_scale2(5); \
2738 op_scale2(6); \
2739 op_scale2(7); \
2740 if(W==8) continue; \
2741 op_scale2(8); \
2742 op_scale2(9); \
2743 op_scale2(10); \
2744 op_scale2(11); \
2745 op_scale2(12); \
2746 op_scale2(13); \
2747 op_scale2(14); \
2748 op_scale2(15); \
2749 } \
2750}
2751
2752H264_WEIGHT(16,16)
2753H264_WEIGHT(16,8)
2754H264_WEIGHT(8,16)
2755H264_WEIGHT(8,8)
2756H264_WEIGHT(8,4)
2757H264_WEIGHT(4,8)
2758H264_WEIGHT(4,4)
2759H264_WEIGHT(4,2)
2760H264_WEIGHT(2,4)
2761H264_WEIGHT(2,2)
2762
2763#undef op_scale1
2764#undef op_scale2
2765#undef H264_WEIGHT
2766
1457ab52 2767static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
55fde95e 2768 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1457ab52
MN
2769 int i;
2770
2771 for(i=0; i<h; i++){
2772 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
2773 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
2774 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
2775 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
2776 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
2777 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
2778 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
2779 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
2780 dst+=dstStride;
115329f1 2781 src+=srcStride;
1457ab52
MN
2782 }
2783}
2784
b250f9c6 2785#if CONFIG_CAVS_DECODER
b482e2d1 2786/* AVS specific */
b482e2d1
MN
2787void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
2788 put_pixels8_c(dst, src, stride, 8);
2789}
2790void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
2791 avg_pixels8_c(dst, src, stride, 8);
2792}
2793void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
2794 put_pixels16_c(dst, src, stride, 16);
2795}
2796void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
2797 avg_pixels16_c(dst, src, stride, 16);
2798}
29c5cdca 2799#endif /* CONFIG_CAVS_DECODER */
b482e2d1 2800
9be6f0d2 2801#if CONFIG_VC1_DECODER
64db55ae 2802/* VC-1 specific */
4f602a04 2803void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
74691b7b
KS
2804 put_pixels8_c(dst, src, stride, 8);
2805}
4f602a04 2806void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
6cecd630
DC
2807 avg_pixels8_c(dst, src, stride, 8);
2808}
9be6f0d2 2809#endif /* CONFIG_VC1_DECODER */
64db55ae 2810
c6b237da 2811/* H264 specific */
edecaff8 2812void ff_h264dspenc_init(DSPContext* c, AVCodecContext *avctx);
c6b237da 2813
b250f9c6 2814#if CONFIG_RV40_DECODER
2d8a0815
KS
2815static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2816 put_pixels16_xy2_c(dst, src, stride, 16);
2817}
2818static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2819 avg_pixels16_xy2_c(dst, src, stride, 16);
2820}
2821static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2822 put_pixels8_xy2_c(dst, src, stride, 8);
2823}
2824static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2825 avg_pixels8_xy2_c(dst, src, stride, 8);
2826}
2d8a0815
KS
2827#endif /* CONFIG_RV40_DECODER */
2828
1457ab52 2829static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
55fde95e 2830 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1457ab52
MN
2831 int i;
2832
2833 for(i=0; i<w; i++){
2834 const int src_1= src[ -srcStride];
2835 const int src0 = src[0 ];
2836 const int src1 = src[ srcStride];
2837 const int src2 = src[2*srcStride];
2838 const int src3 = src[3*srcStride];
2839 const int src4 = src[4*srcStride];
2840 const int src5 = src[5*srcStride];
2841 const int src6 = src[6*srcStride];
2842 const int src7 = src[7*srcStride];
2843 const int src8 = src[8*srcStride];
2844 const int src9 = src[9*srcStride];
2845 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
2846 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
2847 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
2848 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
2849 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
2850 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
2851 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
2852 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
2853 src++;
2854 dst++;
2855 }
2856}
2857
2858static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
2859 put_pixels8_c(dst, src, stride, 8);
2860}
2861
2862static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
2863 uint8_t half[64];
2864 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
2865 put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
2866}
2867
2868static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
2869 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
2870}
2871
2872static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
2873 uint8_t half[64];
2874 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
2875 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
2876}
2877
2878static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
2879 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
2880}
2881
2882static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
2883 uint8_t halfH[88];
2884 uint8_t halfV[64];
2885 uint8_t halfHV[64];
2886 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2887 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
2888 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
2889 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
2890}
2891static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
2892 uint8_t halfH[88];
2893 uint8_t halfV[64];
2894 uint8_t halfHV[64];
2895 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2896 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
2897 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
2898 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
2899}
2900static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
2901 uint8_t halfH[88];
2902 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2903 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
2904}
2905
332f9ac4 2906static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
4052cbf1 2907 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
332f9ac4
MN
2908 int x;
2909 const int strength= ff_h263_loop_filter_strength[qscale];
115329f1 2910
332f9ac4
MN
2911 for(x=0; x<8; x++){
2912 int d1, d2, ad1;
2913 int p0= src[x-2*stride];
2914 int p1= src[x-1*stride];
2915 int p2= src[x+0*stride];
2916 int p3= src[x+1*stride];
2917 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
2918
2919 if (d<-2*strength) d1= 0;
2920 else if(d<- strength) d1=-2*strength - d;
2921 else if(d< strength) d1= d;
2922 else if(d< 2*strength) d1= 2*strength - d;
2923 else d1= 0;
115329f1 2924
332f9ac4
MN
2925 p1 += d1;
2926 p2 -= d1;
2927 if(p1&256) p1= ~(p1>>31);
2928 if(p2&256) p2= ~(p2>>31);
115329f1 2929
332f9ac4
MN
2930 src[x-1*stride] = p1;
2931 src[x+0*stride] = p2;
2932
c26abfa5 2933 ad1= FFABS(d1)>>1;
115329f1 2934
f66e4f5f 2935 d2= av_clip((p0-p3)/4, -ad1, ad1);
115329f1 2936
332f9ac4
MN
2937 src[x-2*stride] = p0 - d2;
2938 src[x+ stride] = p3 + d2;
2939 }
73f51a4d 2940 }
332f9ac4
MN
2941}
2942
2943static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
4052cbf1 2944 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
332f9ac4
MN
2945 int y;
2946 const int strength= ff_h263_loop_filter_strength[qscale];
115329f1 2947
332f9ac4
MN
2948 for(y=0; y<8; y++){
2949 int d1, d2, ad1;
2950 int p0= src[y*stride-2];
2951 int p1= src[y*stride-1];
2952 int p2= src[y*stride+0];
2953 int p3= src[y*stride+1];
2954 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
2955
2956 if (d<-2*strength) d1= 0;
2957 else if(d<- strength) d1=-2*strength - d;
2958 else if(d< strength) d1= d;
2959 else if(d< 2*strength) d1= 2*strength - d;
2960 else d1= 0;
115329f1 2961
332f9ac4
MN
2962 p1 += d1;
2963 p2 -= d1;
2964 if(p1&256) p1= ~(p1>>31);
2965 if(p2&256) p2= ~(p2>>31);
115329f1 2966
332f9ac4
MN
2967 src[y*stride-1] = p1;
2968 src[y*stride+0] = p2;
2969
c26abfa5 2970 ad1= FFABS(d1)>>1;
115329f1 2971
f66e4f5f 2972 d2= av_clip((p0-p3)/4, -ad1, ad1);
115329f1 2973
332f9ac4
MN
2974 src[y*stride-2] = p0 - d2;
2975 src[y*stride+1] = p3 + d2;
2976 }
73f51a4d 2977 }
332f9ac4 2978}
1457ab52 2979
fdbbf2e0
MN
2980static void h261_loop_filter_c(uint8_t *src, int stride){
2981 int x,y,xy,yz;
2982 int temp[64];
2983
2984 for(x=0; x<8; x++){
2985 temp[x ] = 4*src[x ];
2986 temp[x + 7*8] = 4*src[x + 7*stride];
2987 }
2988 for(y=1; y<7; y++){
2989 for(x=0; x<8; x++){
2990 xy = y * stride + x;
2991 yz = y * 8 + x;
2992 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
c6148de2
MN
2993 }
2994 }
115329f1 2995
fdbbf2e0
MN
2996 for(y=0; y<8; y++){
2997 src[ y*stride] = (temp[ y*8] + 2)>>2;
2998 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
2999 for(x=1; x<7; x++){
3000 xy = y * stride + x;
3001 yz = y * 8 + x;
3002 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
c6148de2
MN
3003 }
3004 }
3005}
3006
3f50965b 3007static av_always_inline av_flatten void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
42251a2a
LM
3008{
3009 int i, d;
3010 for( i = 0; i < 4; i++ ) {
3011 if( tc0[i] < 0 ) {
3012 pix += 4*ystride;
3013 continue;
3014 }
3015 for( d = 0; d < 4; d++ ) {
3016 const int p0 = pix[-1*xstride];
3017 const int p1 = pix[-2*xstride];
3018 const int p2 = pix[-3*xstride];
3019 const int q0 = pix[0];
3020 const int q1 = pix[1*xstride];
3021 const int q2 = pix[2*xstride];
115329f1 3022
c26abfa5
DB
3023 if( FFABS( p0 - q0 ) < alpha &&
3024 FFABS( p1 - p0 ) < beta &&
3025 FFABS( q1 - q0 ) < beta ) {
115329f1 3026
42251a2a
LM
3027 int tc = tc0[i];
3028 int i_delta;
115329f1 3029
c26abfa5 3030 if( FFABS( p2 - p0 ) < beta ) {
c9640c17 3031 if(tc0[i])
f66e4f5f 3032 pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] );
42251a2a
LM
3033 tc++;
3034 }
c26abfa5 3035 if( FFABS( q2 - q0 ) < beta ) {
c9640c17 3036 if(tc0[i])
f66e4f5f 3037 pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] );
42251a2a
LM
3038 tc++;
3039 }
115329f1 3040
f66e4f5f
RD
3041 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
3042 pix[-xstride] = av_clip_uint8( p0 + i_delta ); /* p0' */
3043 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
42251a2a
LM
3044 }
3045 pix += ystride;
3046 }
3047 }
3048}
5cf08f23 3049static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
42251a2a
LM
3050{
3051 h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0);
3052}
5cf08f23 3053static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
42251a2a
LM
3054{
3055 h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0);
3056}
3057
3f50965b 3058static av_always_inline av_flatten void h264_loop_filter_luma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
712ca84c
JGG
3059{
3060 int d;
3061 for( d = 0; d < 16; d++ ) {
3062 const int p2 = pix[-3*xstride];
3063 const int p1 = pix[-2*xstride];
3064 const int p0 = pix[-1*xstride];
3065
3066 const int q0 = pix[ 0*xstride];
3067 const int q1 = pix[ 1*xstride];
3068 const int q2 = pix[ 2*xstride];
3069
3070 if( FFABS( p0 - q0 ) < alpha &&
3071 FFABS( p1 - p0 ) < beta &&
3072 FFABS( q1 - q0 ) < beta ) {
3073
3074 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
3075 if( FFABS( p2 - p0 ) < beta)
3076 {
3077 const int p3 = pix[-4*xstride];
3078 /* p0', p1', p2' */
3079 pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
3080 pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
3081 pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
3082 } else {
3083 /* p0' */
3084 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
3085 }
3086 if( FFABS( q2 - q0 ) < beta)
3087 {
3088 const int q3 = pix[3*xstride];
3089 /* q0', q1', q2' */
3090 pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
3091 pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
3092 pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
3093 } else {
3094 /* q0' */
3095 pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
3096 }
3097 }else{
3098 /* p0', q0' */
3099 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
3100 pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
3101 }
3102 }
3103 pix += ystride;
3104 }
3105}
3106static void h264_v_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
3107{
3108 h264_loop_filter_luma_intra_c(pix, stride, 1, alpha, beta);
3109}
3110static void h264_h_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
3111{
3112 h264_loop_filter_luma_intra_c(pix, 1, stride, alpha, beta);
3113}
3114
3f50965b 3115static av_always_inline av_flatten void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
42251a2a
LM
3116{
3117 int i, d;
3118 for( i = 0; i < 4; i++ ) {
3119 const int tc = tc0[i];
3120 if( tc <= 0 ) {
3121 pix += 2*ystride;
3122 continue;
3123 }
3124 for( d = 0; d < 2; d++ ) {
3125 const int p0 = pix[-1*xstride];
3126 const int p1 = pix[-2*xstride];
3127 const int q0 = pix[0];
3128 const int q1 = pix[1*xstride];
3129
c26abfa5
DB
3130 if( FFABS( p0 - q0 ) < alpha &&
3131 FFABS( p1 - p0 ) < beta &&
3132 FFABS( q1 - q0 ) < beta ) {
42251a2a 3133
f66e4f5f 3134 int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
42251a2a 3135
f66e4f5f
RD
3136 pix[-xstride] = av_clip_uint8( p0 + delta ); /* p0' */
3137 pix[0] = av_clip_uint8( q0 - delta ); /* q0' */
42251a2a
LM
3138 }
3139 pix += ystride;
3140 }
3141 }
3142}
5cf08f23 3143static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
42251a2a
LM
3144{
3145 h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0);
3146}
5cf08f23 3147static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
42251a2a
LM
3148{
3149 h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0);
3150}
3151
3f50965b 3152static av_always_inline av_flatten void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
5cf08f23
LM
3153{
3154 int d;
3155 for( d = 0; d < 8; d++ ) {
3156 const int p0 = pix[-1*xstride];
3157 const int p1 = pix[-2*xstride];
3158 const int q0 = pix[0];
3159 const int q1 = pix[1*xstride];
3160
c26abfa5
DB
3161 if( FFABS( p0 - q0 ) < alpha &&
3162 FFABS( p1 - p0 ) < beta &&
3163 FFABS( q1 - q0 ) < beta ) {
5cf08f23
LM
3164
3165 pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
3166 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
3167 }
3168 pix += ystride;
3169 }
3170}
3171static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
3172{
3173 h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta);
3174}
3175static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
3176{
3177 h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta);
3178}
3179
bb198e19 3180static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
3181{
3182 int s, i;
3183
3184 s = 0;
bb198e19 3185 for(i=0;i<h;i++) {
de6d9b64
FB
3186 s += abs(pix1[0] - pix2[0]);
3187 s += abs(pix1[1] - pix2[1]);
3188 s += abs(pix1[2] - pix2[2]);
3189 s += abs(pix1[3] - pix2[3]);
3190 s += abs(pix1[4] - pix2[4]);
3191 s += abs(pix1[5] - pix2[5]);
3192 s += abs(pix1[6] - pix2[6]);
3193 s += abs(pix1[7] - pix2[7]);
3194 s += abs(pix1[8] - pix2[8]);
3195 s += abs(pix1[9] - pix2[9]);
3196 s += abs(pix1[10] - pix2[10]);
3197 s += abs(pix1[11] - pix2[11]);
3198 s += abs(pix1[12] - pix2[12]);
3199 s += abs(pix1[13] - pix2[13]);
3200 s += abs(pix1[14] - pix2[14]);
3201 s += abs(pix1[15] - pix2[15]);
3202 pix1 += line_size;
3203 pix2 += line_size;
3204 }
3205 return s;
3206}
3207
bb198e19 3208static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
3209{
3210 int s, i;
3211
3212 s = 0;
bb198e19 3213 for(i=0;i<h;i++) {
de6d9b64
FB
3214 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
3215 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
3216 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
3217 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
3218 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
3219 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
3220 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
3221 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
3222 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
3223 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
3224 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
3225 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
3226 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
3227 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
3228 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
3229 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
3230 pix1 += line_size;
3231 pix2 += line_size;
3232 }
3233 return s;
3234}
3235
bb198e19 3236static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
3237{
3238 int s, i;
0c1a9eda 3239 uint8_t *pix3 = pix2 + line_size;
de6d9b64
FB
3240
3241 s = 0;
bb198e19 3242 for(i=0;i<h;i++) {
de6d9b64
FB
3243 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
3244 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
3245 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
3246 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
3247 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
3248 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
3249 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
3250 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
3251 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
3252 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
3253 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
3254 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
3255 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
3256 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
3257 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
3258 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
3259 pix1 += line_size;
3260 pix2 += line_size;
3261 pix3 += line_size;
3262 }
3263 return s;
3264}
3265
bb198e19 3266static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
de6d9b64
FB
3267{
3268 int s, i;
0c1a9eda 3269 uint8_t *pix3 = pix2 + line_size;
de6d9b64
FB
3270
3271 s = 0;
bb198e19 3272 for(i=0;i<h;i++) {
de6d9b64
FB
3273 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
3274 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
3275 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
3276 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
3277 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
3278 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
3279 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
3280 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
3281 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
3282 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
3283 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
3284 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
3285 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
3286 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
3287 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
3288 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
3289 pix1 += line_size;
3290 pix2 += line_size;
3291 pix3 += line_size;
3292 }
3293 return s;
3294}
3295
bb198e19 3296static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
3297{
3298 int s, i;
3299
3300 s = 0;
bb198e19 3301 for(i=0;i<h;i++) {
ba6802de
MN
3302 s += abs(pix1[0] - pix2[0]);
3303 s += abs(pix1[1] - pix2[1]);
3304 s += abs(pix1[2] - pix2[2]);
3305 s += abs(pix1[3] - pix2[3]);
3306 s += abs(pix1[4] - pix2[4]);
3307 s += abs(pix1[5] - pix2[5]);
3308 s += abs(pix1[6] - pix2[6]);
3309 s += abs(pix1[7] - pix2[7]);
3310 pix1 += line_size;
3311 pix2 += line_size;
3312 }
3313 return s;
3314}
3315
bb198e19 3316static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
3317{
3318 int s, i;
3319
3320 s = 0;
bb198e19 3321 for(i=0;i<h;i++) {
ba6802de
MN
3322 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
3323 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
3324 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
3325 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
3326 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
3327 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
3328 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
3329 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
3330 pix1 += line_size;
3331 pix2 += line_size;
3332 }
3333 return s;
3334}
3335
bb198e19 3336static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
3337{
3338 int s, i;
0c1a9eda 3339 uint8_t *pix3 = pix2 + line_size;
ba6802de
MN
3340
3341 s = 0;
bb198e19 3342 for(i=0;i<h;i++) {
ba6802de
MN
3343 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
3344 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
3345 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
3346 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
3347 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
3348 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
3349 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
3350 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
3351 pix1 += line_size;
3352 pix2 += line_size;
3353 pix3 += line_size;
3354 }
3355 return s;
3356}
3357
bb198e19 3358static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
ba6802de
MN
3359{
3360 int s, i;
0c1a9eda 3361 uint8_t *pix3 = pix2 + line_size;
ba6802de
MN
3362
3363 s = 0;
bb198e19 3364 for(i=0;i<h;i++) {
ba6802de
MN
3365 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
3366 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
3367 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
3368 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
3369 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
3370 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
3371 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
3372 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
3373 pix1 += line_size;
3374 pix2 += line_size;
3375 pix3 += line_size;
3376 }
3377 return s;
3378}
3379
bf4e3bd2
MR
3380static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
3381 MpegEncContext *c = v;
e6a2ac34
MN
3382 int score1=0;
3383 int score2=0;
3384 int x,y;
d4c5d2ad 3385
e6a2ac34
MN
3386 for(y=0; y<h; y++){
3387 for(x=0; x<16; x++){
3388 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
3389 }
3390 if(y+1<h){
3391 for(x=0; x<15; x++){
c26abfa5 3392 score2+= FFABS( s1[x ] - s1[x +stride]
e6a2ac34 3393 - s1[x+1] + s1[x+1+stride])
c26abfa5 3394 -FFABS( s2[x ] - s2[x +stride]
e6a2ac34
MN
3395 - s2[x+1] + s2[x+1+stride]);
3396 }
3397 }
3398 s1+= stride;
3399 s2+= stride;
3400 }
d4c5d2ad 3401
c26abfa5
DB
3402 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
3403 else return score1 + FFABS(score2)*8;
e6a2ac34
MN
3404}
3405
bf4e3bd2
MR
3406static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
3407 MpegEncContext *c = v;
e6a2ac34
MN
3408 int score1=0;
3409 int score2=0;
3410 int x,y;
115329f1 3411
e6a2ac34
MN
3412 for(y=0; y<h; y++){
3413 for(x=0; x<8; x++){
3414 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
3415 }
3416 if(y+1<h){
3417 for(x=0; x<7; x++){
c26abfa5 3418 score2+= FFABS( s1[x ] - s1[x +stride]
e6a2ac34 3419 - s1[x+1] + s1[x+1+stride])
c26abfa5 3420 -FFABS( s2[x ] - s2[x +stride]
e6a2ac34
MN
3421 - s2[x+1] + s2[x+1+stride]);
3422 }
3423 }
3424 s1+= stride;
3425 s2+= stride;
3426 }
115329f1 3427
c26abfa5
DB
3428 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
3429 else return score1 + FFABS(score2)*8;
e6a2ac34
MN
3430}
3431
364a1797
MN
3432static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
3433 int i;
3434 unsigned int sum=0;
3435
3436 for(i=0; i<8*8; i++){
3437 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
3438 int w= weight[i];
3439 b>>= RECON_SHIFT;
3440 assert(-512<b && b<512);
3441
3442 sum += (w*b)*(w*b)>>4;
3443 }
3444 return sum>>2;
3445}
3446
3447static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
3448 int i;
3449
3450 for(i=0; i<8*8; i++){
3451 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
115329f1 3452 }
364a1797
MN
3453}
3454
a9badb51
MN
3455/**
3456 * permutes an 8x8 block.
2a5700de 3457 * @param block the block which will be permuted according to the given permutation vector
a9badb51
MN
3458 * @param permutation the permutation vector
3459 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
115329f1 3460 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
2a5700de 3461 * (inverse) permutated to scantable order!
a9badb51 3462 */
0c1a9eda 3463void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
d962f6fd 3464{
7801d21d 3465 int i;
477ab036 3466 DCTELEM temp[64];
115329f1 3467
7801d21d 3468 if(last<=0) return;
90b5b51e 3469 //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
d962f6fd 3470
7801d21d
MN
3471 for(i=0; i<=last; i++){
3472 const int j= scantable[i];
3473 temp[j]= block[j];
3474 block[j]=0;
3475 }
115329f1 3476
7801d21d
MN
3477 for(i=0; i<=last; i++){
3478 const int j= scantable[i];
3479 const int perm_j= permutation[j];
3480 block[perm_j]= temp[j];
3481 }
d962f6fd 3482}
e0eac44e 3483
622348f9
MN
3484static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
3485 return 0;
3486}
3487
3488void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
3489 int i;
115329f1 3490
3899eb2f 3491 memset(cmp, 0, sizeof(void*)*6);
115329f1 3492
3899eb2f 3493 for(i=0; i<6; i++){
622348f9
MN
3494 switch(type&0xFF){
3495 case FF_CMP_SAD:
3496 cmp[i]= c->sad[i];
3497 break;
3498 case FF_CMP_SATD:
3499 cmp[i]= c->hadamard8_diff[i];
3500 break;
3501 case FF_CMP_SSE:
3502 cmp[i]= c->sse[i];
3503 break;
3504 case FF_CMP_DCT:
3505 cmp[i]= c->dct_sad[i];
3506 break;
27c61ac5
MN
3507 case FF_CMP_DCT264:
3508 cmp[i]= c->dct264_sad[i];
3509 break;
0fd6aea1
MN
3510 case FF_CMP_DCTMAX:
3511 cmp[i]= c->dct_max[i];
3512 break;
622348f9
MN
3513 case FF_CMP_PSNR:
3514 cmp[i]= c->quant_psnr[i];
3515 break;
3516 case FF_CMP_BIT:
3517 cmp[i]= c->bit[i];
3518 break;
3519 case FF_CMP_RD:
3520 cmp[i]= c->rd[i];
3521 break;
3522 case FF_CMP_VSAD:
3523 cmp[i]= c->vsad[i];
3524 break;
3525 case FF_CMP_VSSE:
3526 cmp[i]= c->vsse[i];
3527 break;
3528 case FF_CMP_ZERO:
3529 cmp[i]= zero_cmp;
3530 break;
e6a2ac34
MN
3531 case FF_CMP_NSSE:
3532 cmp[i]= c->nsse[i];
3533 break;
b250f9c6 3534#if CONFIG_SNOW_ENCODER
26efc54e
MN
3535 case FF_CMP_W53:
3536 cmp[i]= c->w53[i];
3537 break;
3538 case FF_CMP_W97:
3539 cmp[i]= c->w97[i];
3540 break;
3a6fc8fa 3541#endif
622348f9
MN
3542 default:
3543 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
3544 }
3545 }
3546}
3547
5fecfb7d
LM
3548static void clear_block_c(DCTELEM *block)
3549{
3550 memset(block, 0, sizeof(DCTELEM)*64);
3551}
3552
2a5700de
MN
3553/**
3554 * memset(blocks, 0, sizeof(DCTELEM)*6*64)
3555 */
eb4b3dd3 3556static void clear_blocks_c(DCTELEM *blocks)
649c00c9
MN
3557{
3558 memset(blocks, 0, sizeof(DCTELEM)*6*64);
3559}