3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
8 * This file is part of FFmpeg.
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * @file libavcodec/dsputil.c
32 #include "simple_idct.h"
37 #include "mpegvideo.h"
44 uint8_t ff_cropTbl
[256 + 2 * MAX_NEG_CROP
] = {0, };
45 uint32_t ff_squareTbl
[512] = {0, };
47 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
48 #define pb_7f (~0UL/255 * 0x7f)
49 #define pb_80 (~0UL/255 * 0x80)
51 const uint8_t ff_zigzag_direct
[64] = {
52 0, 1, 8, 16, 9, 2, 3, 10,
53 17, 24, 32, 25, 18, 11, 4, 5,
54 12, 19, 26, 33, 40, 48, 41, 34,
55 27, 20, 13, 6, 7, 14, 21, 28,
56 35, 42, 49, 56, 57, 50, 43, 36,
57 29, 22, 15, 23, 30, 37, 44, 51,
58 58, 59, 52, 45, 38, 31, 39, 46,
59 53, 60, 61, 54, 47, 55, 62, 63
62 /* Specific zigzag scan for 248 idct. NOTE that unlike the
63 specification, we interleave the fields */
64 const uint8_t ff_zigzag248_direct
[64] = {
65 0, 8, 1, 9, 16, 24, 2, 10,
66 17, 25, 32, 40, 48, 56, 33, 41,
67 18, 26, 3, 11, 4, 12, 19, 27,
68 34, 42, 49, 57, 50, 58, 35, 43,
69 20, 28, 5, 13, 6, 14, 21, 29,
70 36, 44, 51, 59, 52, 60, 37, 45,
71 22, 30, 7, 15, 23, 31, 38, 46,
72 53, 61, 54, 62, 39, 47, 55, 63,
75 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
76 DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16
)[64];
78 const uint8_t ff_alternate_horizontal_scan
[64] = {
79 0, 1, 2, 3, 8, 9, 16, 17,
80 10, 11, 4, 5, 6, 7, 15, 14,
81 13, 12, 19, 18, 24, 25, 32, 33,
82 26, 27, 20, 21, 22, 23, 28, 29,
83 30, 31, 34, 35, 40, 41, 48, 49,
84 42, 43, 36, 37, 38, 39, 44, 45,
85 46, 47, 50, 51, 56, 57, 58, 59,
86 52, 53, 54, 55, 60, 61, 62, 63,
89 const uint8_t ff_alternate_vertical_scan
[64] = {
90 0, 8, 16, 24, 1, 9, 2, 10,
91 17, 25, 32, 40, 48, 56, 57, 49,
92 41, 33, 26, 18, 3, 11, 4, 12,
93 19, 27, 34, 42, 50, 58, 35, 43,
94 51, 59, 20, 28, 5, 13, 6, 14,
95 21, 29, 36, 44, 52, 60, 37, 45,
96 53, 61, 22, 30, 7, 15, 23, 31,
97 38, 46, 54, 62, 39, 47, 55, 63,
100 /* a*inverse[b]>>32 == a/b for all 0<=a<=16909558 && 2<=b<=256
101 * for a>16909558, is an overestimate by less than 1 part in 1<<24 */
102 const uint32_t ff_inverse
[257]={
103 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
104 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
105 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
106 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
107 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
108 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
109 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
110 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
111 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
112 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
113 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
114 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
115 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
116 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
117 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
118 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
119 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
120 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
121 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
122 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
123 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
124 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
125 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
126 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
127 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
128 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
129 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
130 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
131 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
132 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
133 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
134 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
138 /* Input permutation for the simple_idct_mmx */
139 static const uint8_t simple_mmx_permutation
[64]={
140 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
141 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
142 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
143 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
144 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
145 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
146 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
147 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
150 static const uint8_t idct_sse2_row_perm
[8] = {0, 4, 1, 5, 2, 6, 3, 7};
152 void ff_init_scantable(uint8_t *permutation
, ScanTable
*st
, const uint8_t *src_scantable
){
156 st
->scantable
= src_scantable
;
160 j
= src_scantable
[i
];
161 st
->permutated
[i
] = permutation
[j
];
170 j
= st
->permutated
[i
];
172 st
->raster_end
[i
]= end
;
176 static int pix_sum_c(uint8_t * pix
, int line_size
)
181 for (i
= 0; i
< 16; i
++) {
182 for (j
= 0; j
< 16; j
+= 8) {
193 pix
+= line_size
- 16;
198 static int pix_norm1_c(uint8_t * pix
, int line_size
)
201 uint32_t *sq
= ff_squareTbl
+ 256;
204 for (i
= 0; i
< 16; i
++) {
205 for (j
= 0; j
< 16; j
+= 8) {
216 #if LONG_MAX > 2147483647
217 register uint64_t x
=*(uint64_t*)pix
;
219 s
+= sq
[(x
>>8)&0xff];
220 s
+= sq
[(x
>>16)&0xff];
221 s
+= sq
[(x
>>24)&0xff];
222 s
+= sq
[(x
>>32)&0xff];
223 s
+= sq
[(x
>>40)&0xff];
224 s
+= sq
[(x
>>48)&0xff];
225 s
+= sq
[(x
>>56)&0xff];
227 register uint32_t x
=*(uint32_t*)pix
;
229 s
+= sq
[(x
>>8)&0xff];
230 s
+= sq
[(x
>>16)&0xff];
231 s
+= sq
[(x
>>24)&0xff];
232 x
=*(uint32_t*)(pix
+4);
234 s
+= sq
[(x
>>8)&0xff];
235 s
+= sq
[(x
>>16)&0xff];
236 s
+= sq
[(x
>>24)&0xff];
241 pix
+= line_size
- 16;
246 static void bswap_buf(uint32_t *dst
, const uint32_t *src
, int w
){
249 for(i
=0; i
+8<=w
; i
+=8){
250 dst
[i
+0]= bswap_32(src
[i
+0]);
251 dst
[i
+1]= bswap_32(src
[i
+1]);
252 dst
[i
+2]= bswap_32(src
[i
+2]);
253 dst
[i
+3]= bswap_32(src
[i
+3]);
254 dst
[i
+4]= bswap_32(src
[i
+4]);
255 dst
[i
+5]= bswap_32(src
[i
+5]);
256 dst
[i
+6]= bswap_32(src
[i
+6]);
257 dst
[i
+7]= bswap_32(src
[i
+7]);
260 dst
[i
+0]= bswap_32(src
[i
+0]);
264 static int sse4_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
)
267 uint32_t *sq
= ff_squareTbl
+ 256;
270 for (i
= 0; i
< h
; i
++) {
271 s
+= sq
[pix1
[0] - pix2
[0]];
272 s
+= sq
[pix1
[1] - pix2
[1]];
273 s
+= sq
[pix1
[2] - pix2
[2]];
274 s
+= sq
[pix1
[3] - pix2
[3]];
281 static int sse8_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
)
284 uint32_t *sq
= ff_squareTbl
+ 256;
287 for (i
= 0; i
< h
; i
++) {
288 s
+= sq
[pix1
[0] - pix2
[0]];
289 s
+= sq
[pix1
[1] - pix2
[1]];
290 s
+= sq
[pix1
[2] - pix2
[2]];
291 s
+= sq
[pix1
[3] - pix2
[3]];
292 s
+= sq
[pix1
[4] - pix2
[4]];
293 s
+= sq
[pix1
[5] - pix2
[5]];
294 s
+= sq
[pix1
[6] - pix2
[6]];
295 s
+= sq
[pix1
[7] - pix2
[7]];
302 static int sse16_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
305 uint32_t *sq
= ff_squareTbl
+ 256;
308 for (i
= 0; i
< h
; i
++) {
309 s
+= sq
[pix1
[ 0] - pix2
[ 0]];
310 s
+= sq
[pix1
[ 1] - pix2
[ 1]];
311 s
+= sq
[pix1
[ 2] - pix2
[ 2]];
312 s
+= sq
[pix1
[ 3] - pix2
[ 3]];
313 s
+= sq
[pix1
[ 4] - pix2
[ 4]];
314 s
+= sq
[pix1
[ 5] - pix2
[ 5]];
315 s
+= sq
[pix1
[ 6] - pix2
[ 6]];
316 s
+= sq
[pix1
[ 7] - pix2
[ 7]];
317 s
+= sq
[pix1
[ 8] - pix2
[ 8]];
318 s
+= sq
[pix1
[ 9] - pix2
[ 9]];
319 s
+= sq
[pix1
[10] - pix2
[10]];
320 s
+= sq
[pix1
[11] - pix2
[11]];
321 s
+= sq
[pix1
[12] - pix2
[12]];
322 s
+= sq
[pix1
[13] - pix2
[13]];
323 s
+= sq
[pix1
[14] - pix2
[14]];
324 s
+= sq
[pix1
[15] - pix2
[15]];
333 #if CONFIG_SNOW_ENCODER //dwt is in snow.c
334 static inline int w_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int w
, int h
, int type
){
336 const int dec_count
= w
==8 ?
3 : 4;
339 static const int scale
[2][2][4][4]={
343 {268, 239, 239, 213},
347 // 9/7 16x16 or 32x32 dec=4
348 {344, 310, 310, 280},
356 {275, 245, 245, 218},
360 // 5/3 16x16 or 32x32 dec=4
361 {352, 317, 317, 286},
369 for (i
= 0; i
< h
; i
++) {
370 for (j
= 0; j
< w
; j
+=4) {
371 tmp
[32*i
+j
+0] = (pix1
[j
+0] - pix2
[j
+0])<<4;
372 tmp
[32*i
+j
+1] = (pix1
[j
+1] - pix2
[j
+1])<<4;
373 tmp
[32*i
+j
+2] = (pix1
[j
+2] - pix2
[j
+2])<<4;
374 tmp
[32*i
+j
+3] = (pix1
[j
+3] - pix2
[j
+3])<<4;
380 ff_spatial_dwt(tmp
, w
, h
, 32, type
, dec_count
);
384 for(level
=0; level
<dec_count
; level
++){
385 for(ori
= level ?
1 : 0; ori
<4; ori
++){
386 int size
= w
>>(dec_count
-level
);
387 int sx
= (ori
&1) ? size
: 0;
388 int stride
= 32<<(dec_count
-level
);
389 int sy
= (ori
&2) ? stride
>>1 : 0;
391 for(i
=0; i
<size
; i
++){
392 for(j
=0; j
<size
; j
++){
393 int v
= tmp
[sx
+ sy
+ i
*stride
+ j
] * scale
[type
][dec_count
-3][level
][ori
];
403 static int w53_8_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
404 return w_c(v
, pix1
, pix2
, line_size
, 8, h
, 1);
407 static int w97_8_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
408 return w_c(v
, pix1
, pix2
, line_size
, 8, h
, 0);
411 static int w53_16_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
412 return w_c(v
, pix1
, pix2
, line_size
, 16, h
, 1);
415 static int w97_16_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
416 return w_c(v
, pix1
, pix2
, line_size
, 16, h
, 0);
419 int w53_32_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
420 return w_c(v
, pix1
, pix2
, line_size
, 32, h
, 1);
423 int w97_32_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
424 return w_c(v
, pix1
, pix2
, line_size
, 32, h
, 0);
428 /* draw the edges of width 'w' of an image of size width, height */
429 //FIXME check that this is ok for mpeg4 interlaced
430 static void draw_edges_c(uint8_t *buf
, int wrap
, int width
, int height
, int w
)
432 uint8_t *ptr
, *last_line
;
435 last_line
= buf
+ (height
- 1) * wrap
;
438 memcpy(buf
- (i
+ 1) * wrap
, buf
, width
);
439 memcpy(last_line
+ (i
+ 1) * wrap
, last_line
, width
);
443 for(i
=0;i
<height
;i
++) {
444 memset(ptr
- w
, ptr
[0], w
);
445 memset(ptr
+ width
, ptr
[width
-1], w
);
450 memset(buf
- (i
+ 1) * wrap
- w
, buf
[0], w
); /* top left */
451 memset(buf
- (i
+ 1) * wrap
+ width
, buf
[width
-1], w
); /* top right */
452 memset(last_line
+ (i
+ 1) * wrap
- w
, last_line
[0], w
); /* top left */
453 memset(last_line
+ (i
+ 1) * wrap
+ width
, last_line
[width
-1], w
); /* top right */
458 * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
459 * @param buf destination buffer
460 * @param src source buffer
461 * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
462 * @param block_w width of block
463 * @param block_h height of block
464 * @param src_x x coordinate of the top left sample of the block in the source buffer
465 * @param src_y y coordinate of the top left sample of the block in the source buffer
466 * @param w width of the source buffer
467 * @param h height of the source buffer
469 void ff_emulated_edge_mc(uint8_t *buf
, uint8_t *src
, int linesize
, int block_w
, int block_h
,
470 int src_x
, int src_y
, int w
, int h
){
472 int start_y
, start_x
, end_y
, end_x
;
475 src
+= (h
-1-src_y
)*linesize
;
477 }else if(src_y
<=-block_h
){
478 src
+= (1-block_h
-src_y
)*linesize
;
484 }else if(src_x
<=-block_w
){
485 src
+= (1-block_w
-src_x
);
489 start_y
= FFMAX(0, -src_y
);
490 start_x
= FFMAX(0, -src_x
);
491 end_y
= FFMIN(block_h
, h
-src_y
);
492 end_x
= FFMIN(block_w
, w
-src_x
);
494 // copy existing part
495 for(y
=start_y
; y
<end_y
; y
++){
496 for(x
=start_x
; x
<end_x
; x
++){
497 buf
[x
+ y
*linesize
]= src
[x
+ y
*linesize
];
502 for(y
=0; y
<start_y
; y
++){
503 for(x
=start_x
; x
<end_x
; x
++){
504 buf
[x
+ y
*linesize
]= buf
[x
+ start_y
*linesize
];
509 for(y
=end_y
; y
<block_h
; y
++){
510 for(x
=start_x
; x
<end_x
; x
++){
511 buf
[x
+ y
*linesize
]= buf
[x
+ (end_y
-1)*linesize
];
515 for(y
=0; y
<block_h
; y
++){
517 for(x
=0; x
<start_x
; x
++){
518 buf
[x
+ y
*linesize
]= buf
[start_x
+ y
*linesize
];
522 for(x
=end_x
; x
<block_w
; x
++){
523 buf
[x
+ y
*linesize
]= buf
[end_x
- 1 + y
*linesize
];
528 static void get_pixels_c(DCTELEM
*restrict block
, const uint8_t *pixels
, int line_size
)
532 /* read the pixels */
534 block
[0] = pixels
[0];
535 block
[1] = pixels
[1];
536 block
[2] = pixels
[2];
537 block
[3] = pixels
[3];
538 block
[4] = pixels
[4];
539 block
[5] = pixels
[5];
540 block
[6] = pixels
[6];
541 block
[7] = pixels
[7];
547 static void diff_pixels_c(DCTELEM
*restrict block
, const uint8_t *s1
,
548 const uint8_t *s2
, int stride
){
551 /* read the pixels */
553 block
[0] = s1
[0] - s2
[0];
554 block
[1] = s1
[1] - s2
[1];
555 block
[2] = s1
[2] - s2
[2];
556 block
[3] = s1
[3] - s2
[3];
557 block
[4] = s1
[4] - s2
[4];
558 block
[5] = s1
[5] - s2
[5];
559 block
[6] = s1
[6] - s2
[6];
560 block
[7] = s1
[7] - s2
[7];
568 static void put_pixels_clamped_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
572 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
574 /* read the pixels */
576 pixels
[0] = cm
[block
[0]];
577 pixels
[1] = cm
[block
[1]];
578 pixels
[2] = cm
[block
[2]];
579 pixels
[3] = cm
[block
[3]];
580 pixels
[4] = cm
[block
[4]];
581 pixels
[5] = cm
[block
[5]];
582 pixels
[6] = cm
[block
[6]];
583 pixels
[7] = cm
[block
[7]];
590 static void put_pixels_clamped4_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
594 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
596 /* read the pixels */
598 pixels
[0] = cm
[block
[0]];
599 pixels
[1] = cm
[block
[1]];
600 pixels
[2] = cm
[block
[2]];
601 pixels
[3] = cm
[block
[3]];
608 static void put_pixels_clamped2_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
612 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
614 /* read the pixels */
616 pixels
[0] = cm
[block
[0]];
617 pixels
[1] = cm
[block
[1]];
624 static void put_signed_pixels_clamped_c(const DCTELEM
*block
,
625 uint8_t *restrict pixels
,
630 for (i
= 0; i
< 8; i
++) {
631 for (j
= 0; j
< 8; j
++) {
634 else if (*block
> 127)
637 *pixels
= (uint8_t)(*block
+ 128);
641 pixels
+= (line_size
- 8);
645 static void put_pixels_nonclamped_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
650 /* read the pixels */
652 pixels
[0] = block
[0];
653 pixels
[1] = block
[1];
654 pixels
[2] = block
[2];
655 pixels
[3] = block
[3];
656 pixels
[4] = block
[4];
657 pixels
[5] = block
[5];
658 pixels
[6] = block
[6];
659 pixels
[7] = block
[7];
666 static void add_pixels_clamped_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
670 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
672 /* read the pixels */
674 pixels
[0] = cm
[pixels
[0] + block
[0]];
675 pixels
[1] = cm
[pixels
[1] + block
[1]];
676 pixels
[2] = cm
[pixels
[2] + block
[2]];
677 pixels
[3] = cm
[pixels
[3] + block
[3]];
678 pixels
[4] = cm
[pixels
[4] + block
[4]];
679 pixels
[5] = cm
[pixels
[5] + block
[5]];
680 pixels
[6] = cm
[pixels
[6] + block
[6]];
681 pixels
[7] = cm
[pixels
[7] + block
[7]];
687 static void add_pixels_clamped4_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
691 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
693 /* read the pixels */
695 pixels
[0] = cm
[pixels
[0] + block
[0]];
696 pixels
[1] = cm
[pixels
[1] + block
[1]];
697 pixels
[2] = cm
[pixels
[2] + block
[2]];
698 pixels
[3] = cm
[pixels
[3] + block
[3]];
704 static void add_pixels_clamped2_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
708 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
710 /* read the pixels */
712 pixels
[0] = cm
[pixels
[0] + block
[0]];
713 pixels
[1] = cm
[pixels
[1] + block
[1]];
719 static void add_pixels8_c(uint8_t *restrict pixels
, DCTELEM
*block
, int line_size
)
723 pixels
[0] += block
[0];
724 pixels
[1] += block
[1];
725 pixels
[2] += block
[2];
726 pixels
[3] += block
[3];
727 pixels
[4] += block
[4];
728 pixels
[5] += block
[5];
729 pixels
[6] += block
[6];
730 pixels
[7] += block
[7];
736 static void add_pixels4_c(uint8_t *restrict pixels
, DCTELEM
*block
, int line_size
)
740 pixels
[0] += block
[0];
741 pixels
[1] += block
[1];
742 pixels
[2] += block
[2];
743 pixels
[3] += block
[3];
749 static int sum_abs_dctelem_c(DCTELEM
*block
)
753 sum
+= FFABS(block
[i
]);
757 static void fill_block16_c(uint8_t *block
, uint8_t value
, int line_size
, int h
)
761 for (i
= 0; i
< h
; i
++) {
762 memset(block
, value
, 16);
767 static void fill_block8_c(uint8_t *block
, uint8_t value
, int line_size
, int h
)
771 for (i
= 0; i
< h
; i
++) {
772 memset(block
, value
, 8);
777 static void scale_block_c(const uint8_t src
[64]/*align 8*/, uint8_t *dst
/*align 8*/, int linesize
)
780 uint16_t *dst1
= dst
;
781 uint16_t *dst2
= dst
+ linesize
;
783 for (j
= 0; j
< 8; j
++) {
784 for (i
= 0; i
< 8; i
++) {
785 dst1
[i
] = dst2
[i
] = src
[i
] * 0x0101;
795 #define PIXOP2(OPNAME, OP) \
796 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
800 OP(*((uint64_t*)block), AV_RN64(pixels));\
806 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
810 const uint64_t a= AV_RN64(pixels );\
811 const uint64_t b= AV_RN64(pixels+1);\
812 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
818 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
822 const uint64_t a= AV_RN64(pixels );\
823 const uint64_t b= AV_RN64(pixels+1);\
824 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
830 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
834 const uint64_t a= AV_RN64(pixels );\
835 const uint64_t b= AV_RN64(pixels+line_size);\
836 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
842 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
846 const uint64_t a= AV_RN64(pixels );\
847 const uint64_t b= AV_RN64(pixels+line_size);\
848 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
854 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
857 const uint64_t a= AV_RN64(pixels );\
858 const uint64_t b= AV_RN64(pixels+1);\
859 uint64_t l0= (a&0x0303030303030303ULL)\
860 + (b&0x0303030303030303ULL)\
861 + 0x0202020202020202ULL;\
862 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
863 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
867 for(i=0; i<h; i+=2){\
868 uint64_t a= AV_RN64(pixels );\
869 uint64_t b= AV_RN64(pixels+1);\
870 l1= (a&0x0303030303030303ULL)\
871 + (b&0x0303030303030303ULL);\
872 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
873 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
874 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
877 a= AV_RN64(pixels );\
878 b= AV_RN64(pixels+1);\
879 l0= (a&0x0303030303030303ULL)\
880 + (b&0x0303030303030303ULL)\
881 + 0x0202020202020202ULL;\
882 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
883 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
884 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
890 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
893 const uint64_t a= AV_RN64(pixels );\
894 const uint64_t b= AV_RN64(pixels+1);\
895 uint64_t l0= (a&0x0303030303030303ULL)\
896 + (b&0x0303030303030303ULL)\
897 + 0x0101010101010101ULL;\
898 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
899 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
903 for(i=0; i<h; i+=2){\
904 uint64_t a= AV_RN64(pixels );\
905 uint64_t b= AV_RN64(pixels+1);\
906 l1= (a&0x0303030303030303ULL)\
907 + (b&0x0303030303030303ULL);\
908 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
909 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
910 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
913 a= AV_RN64(pixels );\
914 b= AV_RN64(pixels+1);\
915 l0= (a&0x0303030303030303ULL)\
916 + (b&0x0303030303030303ULL)\
917 + 0x0101010101010101ULL;\
918 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
919 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
920 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
926 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
927 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
928 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
929 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
930 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
931 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
932 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
934 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
935 #else // 64 bit variant
937 #define PIXOP2(OPNAME, OP) \
938 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
941 OP(*((uint16_t*)(block )), AV_RN16(pixels ));\
946 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
949 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
954 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
957 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
958 OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
963 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
964 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
967 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
968 int src_stride1, int src_stride2, int h){\
972 a= AV_RN32(&src1[i*src_stride1 ]);\
973 b= AV_RN32(&src2[i*src_stride2 ]);\
974 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
975 a= AV_RN32(&src1[i*src_stride1+4]);\
976 b= AV_RN32(&src2[i*src_stride2+4]);\
977 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
981 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
982 int src_stride1, int src_stride2, int h){\
986 a= AV_RN32(&src1[i*src_stride1 ]);\
987 b= AV_RN32(&src2[i*src_stride2 ]);\
988 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
989 a= AV_RN32(&src1[i*src_stride1+4]);\
990 b= AV_RN32(&src2[i*src_stride2+4]);\
991 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
995 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
996 int src_stride1, int src_stride2, int h){\
1000 a= AV_RN32(&src1[i*src_stride1 ]);\
1001 b= AV_RN32(&src2[i*src_stride2 ]);\
1002 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
1006 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
1007 int src_stride1, int src_stride2, int h){\
1009 for(i=0; i<h; i++){\
1011 a= AV_RN16(&src1[i*src_stride1 ]);\
1012 b= AV_RN16(&src2[i*src_stride2 ]);\
1013 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
1017 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
1018 int src_stride1, int src_stride2, int h){\
1019 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
1020 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
1023 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
1024 int src_stride1, int src_stride2, int h){\
1025 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
1026 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
1029 static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1030 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1033 static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1034 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1037 static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1038 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1041 static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1042 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1045 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1046 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1048 for(i=0; i<h; i++){\
1049 uint32_t a, b, c, d, l0, l1, h0, h1;\
1050 a= AV_RN32(&src1[i*src_stride1]);\
1051 b= AV_RN32(&src2[i*src_stride2]);\
1052 c= AV_RN32(&src3[i*src_stride3]);\
1053 d= AV_RN32(&src4[i*src_stride4]);\
1054 l0= (a&0x03030303UL)\
1057 h0= ((a&0xFCFCFCFCUL)>>2)\
1058 + ((b&0xFCFCFCFCUL)>>2);\
1059 l1= (c&0x03030303UL)\
1060 + (d&0x03030303UL);\
1061 h1= ((c&0xFCFCFCFCUL)>>2)\
1062 + ((d&0xFCFCFCFCUL)>>2);\
1063 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1064 a= AV_RN32(&src1[i*src_stride1+4]);\
1065 b= AV_RN32(&src2[i*src_stride2+4]);\
1066 c= AV_RN32(&src3[i*src_stride3+4]);\
1067 d= AV_RN32(&src4[i*src_stride4+4]);\
1068 l0= (a&0x03030303UL)\
1071 h0= ((a&0xFCFCFCFCUL)>>2)\
1072 + ((b&0xFCFCFCFCUL)>>2);\
1073 l1= (c&0x03030303UL)\
1074 + (d&0x03030303UL);\
1075 h1= ((c&0xFCFCFCFCUL)>>2)\
1076 + ((d&0xFCFCFCFCUL)>>2);\
1077 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1081 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1082 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1085 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1086 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1089 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1090 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1093 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1094 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1097 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1098 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1100 for(i=0; i<h; i++){\
1101 uint32_t a, b, c, d, l0, l1, h0, h1;\
1102 a= AV_RN32(&src1[i*src_stride1]);\
1103 b= AV_RN32(&src2[i*src_stride2]);\
1104 c= AV_RN32(&src3[i*src_stride3]);\
1105 d= AV_RN32(&src4[i*src_stride4]);\
1106 l0= (a&0x03030303UL)\
1109 h0= ((a&0xFCFCFCFCUL)>>2)\
1110 + ((b&0xFCFCFCFCUL)>>2);\
1111 l1= (c&0x03030303UL)\
1112 + (d&0x03030303UL);\
1113 h1= ((c&0xFCFCFCFCUL)>>2)\
1114 + ((d&0xFCFCFCFCUL)>>2);\
1115 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1116 a= AV_RN32(&src1[i*src_stride1+4]);\
1117 b= AV_RN32(&src2[i*src_stride2+4]);\
1118 c= AV_RN32(&src3[i*src_stride3+4]);\
1119 d= AV_RN32(&src4[i*src_stride4+4]);\
1120 l0= (a&0x03030303UL)\
1123 h0= ((a&0xFCFCFCFCUL)>>2)\
1124 + ((b&0xFCFCFCFCUL)>>2);\
1125 l1= (c&0x03030303UL)\
1126 + (d&0x03030303UL);\
1127 h1= ((c&0xFCFCFCFCUL)>>2)\
1128 + ((d&0xFCFCFCFCUL)>>2);\
1129 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1132 static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1133 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1134 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1135 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1137 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1138 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1139 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1140 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1143 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1145 int i, a0, b0, a1, b1;\
1152 for(i=0; i<h; i+=2){\
1158 block[0]= (a1+a0)>>2; /* FIXME non put */\
1159 block[1]= (b1+b0)>>2;\
1169 block[0]= (a1+a0)>>2;\
1170 block[1]= (b1+b0)>>2;\
1176 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1179 const uint32_t a= AV_RN32(pixels );\
1180 const uint32_t b= AV_RN32(pixels+1);\
1181 uint32_t l0= (a&0x03030303UL)\
1184 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1185 + ((b&0xFCFCFCFCUL)>>2);\
1189 for(i=0; i<h; i+=2){\
1190 uint32_t a= AV_RN32(pixels );\
1191 uint32_t b= AV_RN32(pixels+1);\
1192 l1= (a&0x03030303UL)\
1193 + (b&0x03030303UL);\
1194 h1= ((a&0xFCFCFCFCUL)>>2)\
1195 + ((b&0xFCFCFCFCUL)>>2);\
1196 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1199 a= AV_RN32(pixels );\
1200 b= AV_RN32(pixels+1);\
1201 l0= (a&0x03030303UL)\
1204 h0= ((a&0xFCFCFCFCUL)>>2)\
1205 + ((b&0xFCFCFCFCUL)>>2);\
1206 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1212 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1215 for(j=0; j<2; j++){\
1217 const uint32_t a= AV_RN32(pixels );\
1218 const uint32_t b= AV_RN32(pixels+1);\
1219 uint32_t l0= (a&0x03030303UL)\
1222 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1223 + ((b&0xFCFCFCFCUL)>>2);\
1227 for(i=0; i<h; i+=2){\
1228 uint32_t a= AV_RN32(pixels );\
1229 uint32_t b= AV_RN32(pixels+1);\
1230 l1= (a&0x03030303UL)\
1231 + (b&0x03030303UL);\
1232 h1= ((a&0xFCFCFCFCUL)>>2)\
1233 + ((b&0xFCFCFCFCUL)>>2);\
1234 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1237 a= AV_RN32(pixels );\
1238 b= AV_RN32(pixels+1);\
1239 l0= (a&0x03030303UL)\
1242 h0= ((a&0xFCFCFCFCUL)>>2)\
1243 + ((b&0xFCFCFCFCUL)>>2);\
1244 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1248 pixels+=4-line_size*(h+1);\
1249 block +=4-line_size*h;\
1253 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1256 for(j=0; j<2; j++){\
1258 const uint32_t a= AV_RN32(pixels );\
1259 const uint32_t b= AV_RN32(pixels+1);\
1260 uint32_t l0= (a&0x03030303UL)\
1263 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1264 + ((b&0xFCFCFCFCUL)>>2);\
1268 for(i=0; i<h; i+=2){\
1269 uint32_t a= AV_RN32(pixels );\
1270 uint32_t b= AV_RN32(pixels+1);\
1271 l1= (a&0x03030303UL)\
1272 + (b&0x03030303UL);\
1273 h1= ((a&0xFCFCFCFCUL)>>2)\
1274 + ((b&0xFCFCFCFCUL)>>2);\
1275 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1278 a= AV_RN32(pixels );\
1279 b= AV_RN32(pixels+1);\
1280 l0= (a&0x03030303UL)\
1283 h0= ((a&0xFCFCFCFCUL)>>2)\
1284 + ((b&0xFCFCFCFCUL)>>2);\
1285 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1289 pixels+=4-line_size*(h+1);\
1290 block +=4-line_size*h;\
1294 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
1295 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
1296 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
1297 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
1298 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
1299 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
1300 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
1301 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
1303 #define op_avg(a, b) a = rnd_avg32(a, b)
1305 #define op_put(a, b) a = b
1312 #define avg2(a,b) ((a+b+1)>>1)
1313 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
1315 static void put_no_rnd_pixels16_l2_c(uint8_t *dst
, const uint8_t *a
, const uint8_t *b
, int stride
, int h
){
1316 put_no_rnd_pixels16_l2(dst
, a
, b
, stride
, stride
, stride
, h
);
1319 static void put_no_rnd_pixels8_l2_c(uint8_t *dst
, const uint8_t *a
, const uint8_t *b
, int stride
, int h
){
1320 put_no_rnd_pixels8_l2(dst
, a
, b
, stride
, stride
, stride
, h
);
1323 static void gmc1_c(uint8_t *dst
, uint8_t *src
, int stride
, int h
, int x16
, int y16
, int rounder
)
1325 const int A
=(16-x16
)*(16-y16
);
1326 const int B
=( x16
)*(16-y16
);
1327 const int C
=(16-x16
)*( y16
);
1328 const int D
=( x16
)*( y16
);
1333 dst
[0]= (A
*src
[0] + B
*src
[1] + C
*src
[stride
+0] + D
*src
[stride
+1] + rounder
)>>8;
1334 dst
[1]= (A
*src
[1] + B
*src
[2] + C
*src
[stride
+1] + D
*src
[stride
+2] + rounder
)>>8;
1335 dst
[2]= (A
*src
[2] + B
*src
[3] + C
*src
[stride
+2] + D
*src
[stride
+3] + rounder
)>>8;
1336 dst
[3]= (A
*src
[3] + B
*src
[4] + C
*src
[stride
+3] + D
*src
[stride
+4] + rounder
)>>8;
1337 dst
[4]= (A
*src
[4] + B
*src
[5] + C
*src
[stride
+4] + D
*src
[stride
+5] + rounder
)>>8;
1338 dst
[5]= (A
*src
[5] + B
*src
[6] + C
*src
[stride
+5] + D
*src
[stride
+6] + rounder
)>>8;
1339 dst
[6]= (A
*src
[6] + B
*src
[7] + C
*src
[stride
+6] + D
*src
[stride
+7] + rounder
)>>8;
1340 dst
[7]= (A
*src
[7] + B
*src
[8] + C
*src
[stride
+7] + D
*src
[stride
+8] + rounder
)>>8;
1346 void ff_gmc_c(uint8_t *dst
, uint8_t *src
, int stride
, int h
, int ox
, int oy
,
1347 int dxx
, int dxy
, int dyx
, int dyy
, int shift
, int r
, int width
, int height
)
1350 const int s
= 1<<shift
;
1360 for(x
=0; x
<8; x
++){ //XXX FIXME optimize
1361 int src_x
, src_y
, frac_x
, frac_y
, index
;
1365 frac_x
= src_x
&(s
-1);
1366 frac_y
= src_y
&(s
-1);
1370 if((unsigned)src_x
< width
){
1371 if((unsigned)src_y
< height
){
1372 index
= src_x
+ src_y
*stride
;
1373 dst
[y
*stride
+ x
]= ( ( src
[index
]*(s
-frac_x
)
1374 + src
[index
+1]* frac_x
)*(s
-frac_y
)
1375 + ( src
[index
+stride
]*(s
-frac_x
)
1376 + src
[index
+stride
+1]* frac_x
)* frac_y
1379 index
= src_x
+ av_clip(src_y
, 0, height
)*stride
;
1380 dst
[y
*stride
+ x
]= ( ( src
[index
]*(s
-frac_x
)
1381 + src
[index
+1]* frac_x
)*s
1385 if((unsigned)src_y
< height
){
1386 index
= av_clip(src_x
, 0, width
) + src_y
*stride
;
1387 dst
[y
*stride
+ x
]= ( ( src
[index
]*(s
-frac_y
)
1388 + src
[index
+stride
]* frac_y
)*s
1391 index
= av_clip(src_x
, 0, width
) + av_clip(src_y
, 0, height
)*stride
;
1392 dst
[y
*stride
+ x
]= src
[index
];
1404 static inline void put_tpel_pixels_mc00_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1406 case 2: put_pixels2_c (dst
, src
, stride
, height
); break;
1407 case 4: put_pixels4_c (dst
, src
, stride
, height
); break;
1408 case 8: put_pixels8_c (dst
, src
, stride
, height
); break;
1409 case 16:put_pixels16_c(dst
, src
, stride
, height
); break;
1413 static inline void put_tpel_pixels_mc10_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1415 for (i
=0; i
< height
; i
++) {
1416 for (j
=0; j
< width
; j
++) {
1417 dst
[j
] = (683*(2*src
[j
] + src
[j
+1] + 1)) >> 11;
1424 static inline void put_tpel_pixels_mc20_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1426 for (i
=0; i
< height
; i
++) {
1427 for (j
=0; j
< width
; j
++) {
1428 dst
[j
] = (683*(src
[j
] + 2*src
[j
+1] + 1)) >> 11;
1435 static inline void put_tpel_pixels_mc01_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1437 for (i
=0; i
< height
; i
++) {
1438 for (j
=0; j
< width
; j
++) {
1439 dst
[j
] = (683*(2*src
[j
] + src
[j
+stride
] + 1)) >> 11;
1446 static inline void put_tpel_pixels_mc11_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1448 for (i
=0; i
< height
; i
++) {
1449 for (j
=0; j
< width
; j
++) {
1450 dst
[j
] = (2731*(4*src
[j
] + 3*src
[j
+1] + 3*src
[j
+stride
] + 2*src
[j
+stride
+1] + 6)) >> 15;
1457 static inline void put_tpel_pixels_mc12_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1459 for (i
=0; i
< height
; i
++) {
1460 for (j
=0; j
< width
; j
++) {
1461 dst
[j
] = (2731*(3*src
[j
] + 2*src
[j
+1] + 4*src
[j
+stride
] + 3*src
[j
+stride
+1] + 6)) >> 15;
1468 static inline void put_tpel_pixels_mc02_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1470 for (i
=0; i
< height
; i
++) {
1471 for (j
=0; j
< width
; j
++) {
1472 dst
[j
] = (683*(src
[j
] + 2*src
[j
+stride
] + 1)) >> 11;
1479 static inline void put_tpel_pixels_mc21_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1481 for (i
=0; i
< height
; i
++) {
1482 for (j
=0; j
< width
; j
++) {
1483 dst
[j
] = (2731*(3*src
[j
] + 4*src
[j
+1] + 2*src
[j
+stride
] + 3*src
[j
+stride
+1] + 6)) >> 15;
1490 static inline void put_tpel_pixels_mc22_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1492 for (i
=0; i
< height
; i
++) {
1493 for (j
=0; j
< width
; j
++) {
1494 dst
[j
] = (2731*(2*src
[j
] + 3*src
[j
+1] + 3*src
[j
+stride
] + 4*src
[j
+stride
+1] + 6)) >> 15;
1501 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1503 case 2: avg_pixels2_c (dst
, src
, stride
, height
); break;
1504 case 4: avg_pixels4_c (dst
, src
, stride
, height
); break;
1505 case 8: avg_pixels8_c (dst
, src
, stride
, height
); break;
1506 case 16:avg_pixels16_c(dst
, src
, stride
, height
); break;
1510 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1512 for (i
=0; i
< height
; i
++) {
1513 for (j
=0; j
< width
; j
++) {
1514 dst
[j
] = (dst
[j
] + ((683*(2*src
[j
] + src
[j
+1] + 1)) >> 11) + 1) >> 1;
1521 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1523 for (i
=0; i
< height
; i
++) {
1524 for (j
=0; j
< width
; j
++) {
1525 dst
[j
] = (dst
[j
] + ((683*(src
[j
] + 2*src
[j
+1] + 1)) >> 11) + 1) >> 1;
1532 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1534 for (i
=0; i
< height
; i
++) {
1535 for (j
=0; j
< width
; j
++) {
1536 dst
[j
] = (dst
[j
] + ((683*(2*src
[j
] + src
[j
+stride
] + 1)) >> 11) + 1) >> 1;
1543 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1545 for (i
=0; i
< height
; i
++) {
1546 for (j
=0; j
< width
; j
++) {
1547 dst
[j
] = (dst
[j
] + ((2731*(4*src
[j
] + 3*src
[j
+1] + 3*src
[j
+stride
] + 2*src
[j
+stride
+1] + 6)) >> 15) + 1) >> 1;
1554 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1556 for (i
=0; i
< height
; i
++) {
1557 for (j
=0; j
< width
; j
++) {
1558 dst
[j
] = (dst
[j
] + ((2731*(3*src
[j
] + 2*src
[j
+1] + 4*src
[j
+stride
] + 3*src
[j
+stride
+1] + 6)) >> 15) + 1) >> 1;
1565 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1567 for (i
=0; i
< height
; i
++) {
1568 for (j
=0; j
< width
; j
++) {
1569 dst
[j
] = (dst
[j
] + ((683*(src
[j
] + 2*src
[j
+stride
] + 1)) >> 11) + 1) >> 1;
1576 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1578 for (i
=0; i
< height
; i
++) {
1579 for (j
=0; j
< width
; j
++) {
1580 dst
[j
] = (dst
[j
] + ((2731*(3*src
[j
] + 4*src
[j
+1] + 2*src
[j
+stride
] + 3*src
[j
+stride
+1] + 6)) >> 15) + 1) >> 1;
1587 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1589 for (i
=0; i
< height
; i
++) {
1590 for (j
=0; j
< width
; j
++) {
1591 dst
[j
] = (dst
[j
] + ((2731*(2*src
[j
] + 3*src
[j
+1] + 3*src
[j
+stride
] + 4*src
[j
+stride
+1] + 6)) >> 15) + 1) >> 1;
1598 #define TPEL_WIDTH(width)\
1599 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1600 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
1601 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1602 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
1603 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1604 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
1605 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1606 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
1607 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1608 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
1609 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1610 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
1611 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1612 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
1613 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1614 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
1615 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1616 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
1619 #define H264_CHROMA_MC(OPNAME, OP)\
1620 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1621 const int A=(8-x)*(8-y);\
1622 const int B=( x)*(8-y);\
1623 const int C=(8-x)*( y);\
1624 const int D=( x)*( y);\
1627 assert(x<8 && y<8 && x>=0 && y>=0);\
1630 for(i=0; i<h; i++){\
1631 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1632 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1638 const int step= C ? stride : 1;\
1639 for(i=0; i<h; i++){\
1640 OP(dst[0], (A*src[0] + E*src[step+0]));\
1641 OP(dst[1], (A*src[1] + E*src[step+1]));\
1648 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1649 const int A=(8-x)*(8-y);\
1650 const int B=( x)*(8-y);\
1651 const int C=(8-x)*( y);\
1652 const int D=( x)*( y);\
1655 assert(x<8 && y<8 && x>=0 && y>=0);\
1658 for(i=0; i<h; i++){\
1659 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1660 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1661 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1662 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1668 const int step= C ? stride : 1;\
1669 for(i=0; i<h; i++){\
1670 OP(dst[0], (A*src[0] + E*src[step+0]));\
1671 OP(dst[1], (A*src[1] + E*src[step+1]));\
1672 OP(dst[2], (A*src[2] + E*src[step+2]));\
1673 OP(dst[3], (A*src[3] + E*src[step+3]));\
1680 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1681 const int A=(8-x)*(8-y);\
1682 const int B=( x)*(8-y);\
1683 const int C=(8-x)*( y);\
1684 const int D=( x)*( y);\
1687 assert(x<8 && y<8 && x>=0 && y>=0);\
1690 for(i=0; i<h; i++){\
1691 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1692 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1693 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1694 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1695 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
1696 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
1697 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
1698 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
1704 const int step= C ? stride : 1;\
1705 for(i=0; i<h; i++){\
1706 OP(dst[0], (A*src[0] + E*src[step+0]));\
1707 OP(dst[1], (A*src[1] + E*src[step+1]));\
1708 OP(dst[2], (A*src[2] + E*src[step+2]));\
1709 OP(dst[3], (A*src[3] + E*src[step+3]));\
1710 OP(dst[4], (A*src[4] + E*src[step+4]));\
1711 OP(dst[5], (A*src[5] + E*src[step+5]));\
1712 OP(dst[6], (A*src[6] + E*src[step+6]));\
1713 OP(dst[7], (A*src[7] + E*src[step+7]));\
1720 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
1721 #define op_put(a, b) a = (((b) + 32)>>6)
1723 H264_CHROMA_MC(put_
, op_put
)
1724 H264_CHROMA_MC(avg_
, op_avg
)
1728 static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst
/*align 8*/, uint8_t *src
/*align 1*/, int stride
, int h
, int x
, int y
){
1729 const int A
=(8-x
)*(8-y
);
1730 const int B
=( x
)*(8-y
);
1731 const int C
=(8-x
)*( y
);
1732 const int D
=( x
)*( y
);
1735 assert(x
<8 && y
<8 && x
>=0 && y
>=0);
1739 dst
[0] = (A
*src
[0] + B
*src
[1] + C
*src
[stride
+0] + D
*src
[stride
+1] + 32 - 4) >> 6;
1740 dst
[1] = (A
*src
[1] + B
*src
[2] + C
*src
[stride
+1] + D
*src
[stride
+2] + 32 - 4) >> 6;
1741 dst
[2] = (A
*src
[2] + B
*src
[3] + C
*src
[stride
+2] + D
*src
[stride
+3] + 32 - 4) >> 6;
1742 dst
[3] = (A
*src
[3] + B
*src
[4] + C
*src
[stride
+3] + D
*src
[stride
+4] + 32 - 4) >> 6;
1743 dst
[4] = (A
*src
[4] + B
*src
[5] + C
*src
[stride
+4] + D
*src
[stride
+5] + 32 - 4) >> 6;
1744 dst
[5] = (A
*src
[5] + B
*src
[6] + C
*src
[stride
+5] + D
*src
[stride
+6] + 32 - 4) >> 6;
1745 dst
[6] = (A
*src
[6] + B
*src
[7] + C
*src
[stride
+6] + D
*src
[stride
+7] + 32 - 4) >> 6;
1746 dst
[7] = (A
*src
[7] + B
*src
[8] + C
*src
[stride
+7] + D
*src
[stride
+8] + 32 - 4) >> 6;
1752 static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst
/*align 8*/, uint8_t *src
/*align 1*/, int stride
, int h
, int x
, int y
){
1753 const int A
=(8-x
)*(8-y
);
1754 const int B
=( x
)*(8-y
);
1755 const int C
=(8-x
)*( y
);
1756 const int D
=( x
)*( y
);
1759 assert(x
<8 && y
<8 && x
>=0 && y
>=0);
1763 dst
[0] = avg2(dst
[0], ((A
*src
[0] + B
*src
[1] + C
*src
[stride
+0] + D
*src
[stride
+1] + 32 - 4) >> 6));
1764 dst
[1] = avg2(dst
[1], ((A
*src
[1] + B
*src
[2] + C
*src
[stride
+1] + D
*src
[stride
+2] + 32 - 4) >> 6));
1765 dst
[2] = avg2(dst
[2], ((A
*src
[2] + B
*src
[3] + C
*src
[stride
+2] + D
*src
[stride
+3] + 32 - 4) >> 6));
1766 dst
[3] = avg2(dst
[3], ((A
*src
[3] + B
*src
[4] + C
*src
[stride
+3] + D
*src
[stride
+4] + 32 - 4) >> 6));
1767 dst
[4] = avg2(dst
[4], ((A
*src
[4] + B
*src
[5] + C
*src
[stride
+4] + D
*src
[stride
+5] + 32 - 4) >> 6));
1768 dst
[5] = avg2(dst
[5], ((A
*src
[5] + B
*src
[6] + C
*src
[stride
+5] + D
*src
[stride
+6] + 32 - 4) >> 6));
1769 dst
[6] = avg2(dst
[6], ((A
*src
[6] + B
*src
[7] + C
*src
[stride
+6] + D
*src
[stride
+7] + 32 - 4) >> 6));
1770 dst
[7] = avg2(dst
[7], ((A
*src
[7] + B
*src
[8] + C
*src
[stride
+7] + D
*src
[stride
+8] + 32 - 4) >> 6));
1776 #define QPEL_MC(r, OPNAME, RND, OP) \
1777 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1778 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1782 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
1783 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
1784 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
1785 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
1786 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
1787 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
1788 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
1789 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
1795 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1797 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1801 const int src0= src[0*srcStride];\
1802 const int src1= src[1*srcStride];\
1803 const int src2= src[2*srcStride];\
1804 const int src3= src[3*srcStride];\
1805 const int src4= src[4*srcStride];\
1806 const int src5= src[5*srcStride];\
1807 const int src6= src[6*srcStride];\
1808 const int src7= src[7*srcStride];\
1809 const int src8= src[8*srcStride];\
1810 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
1811 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
1812 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
1813 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
1814 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
1815 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
1816 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
1817 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
1823 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1824 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1829 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
1830 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
1831 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
1832 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
1833 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
1834 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
1835 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
1836 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
1837 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
1838 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
1839 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
1840 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
1841 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
1842 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
1843 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
1844 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
1850 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1851 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1856 const int src0= src[0*srcStride];\
1857 const int src1= src[1*srcStride];\
1858 const int src2= src[2*srcStride];\
1859 const int src3= src[3*srcStride];\
1860 const int src4= src[4*srcStride];\
1861 const int src5= src[5*srcStride];\
1862 const int src6= src[6*srcStride];\
1863 const int src7= src[7*srcStride];\
1864 const int src8= src[8*srcStride];\
1865 const int src9= src[9*srcStride];\
1866 const int src10= src[10*srcStride];\
1867 const int src11= src[11*srcStride];\
1868 const int src12= src[12*srcStride];\
1869 const int src13= src[13*srcStride];\
1870 const int src14= src[14*srcStride];\
1871 const int src15= src[15*srcStride];\
1872 const int src16= src[16*srcStride];\
1873 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
1874 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
1875 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
1876 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
1877 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
1878 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
1879 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
1880 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
1881 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
1882 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
1883 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
1884 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
1885 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
1886 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
1887 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
1888 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
1894 static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
1895 OPNAME ## pixels8_c(dst, src, stride, 8);\
1898 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1900 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1901 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
1904 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1905 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
1908 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1910 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1911 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
1914 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1915 uint8_t full[16*9];\
1917 copy_block9(full, src, 16, stride, 9);\
1918 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
1919 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
1922 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1923 uint8_t full[16*9];\
1924 copy_block9(full, src, 16, stride, 9);\
1925 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
1928 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1929 uint8_t full[16*9];\
1931 copy_block9(full, src, 16, stride, 9);\
1932 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
1933 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
1935 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1936 uint8_t full[16*9];\
1939 uint8_t halfHV[64];\
1940 copy_block9(full, src, 16, stride, 9);\
1941 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1942 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1943 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1944 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1946 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1947 uint8_t full[16*9];\
1949 uint8_t halfHV[64];\
1950 copy_block9(full, src, 16, stride, 9);\
1951 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1952 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1953 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1954 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1956 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1957 uint8_t full[16*9];\
1960 uint8_t halfHV[64];\
1961 copy_block9(full, src, 16, stride, 9);\
1962 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1963 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1964 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1965 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1967 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1968 uint8_t full[16*9];\
1970 uint8_t halfHV[64];\
1971 copy_block9(full, src, 16, stride, 9);\
1972 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1973 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1974 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1975 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1977 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1978 uint8_t full[16*9];\
1981 uint8_t halfHV[64];\
1982 copy_block9(full, src, 16, stride, 9);\
1983 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1984 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1985 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1986 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1988 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1989 uint8_t full[16*9];\
1991 uint8_t halfHV[64];\
1992 copy_block9(full, src, 16, stride, 9);\
1993 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1994 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1995 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1996 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1998 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1999 uint8_t full[16*9];\
2002 uint8_t halfHV[64];\
2003 copy_block9(full, src, 16, stride, 9);\
2004 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
2005 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
2006 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
2007 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
2009 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2010 uint8_t full[16*9];\
2012 uint8_t halfHV[64];\
2013 copy_block9(full, src, 16, stride, 9);\
2014 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
2015 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
2016 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
2017 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
2019 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2021 uint8_t halfHV[64];\
2022 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
2023 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
2024 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
2026 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2028 uint8_t halfHV[64];\
2029 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
2030 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
2031 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
2033 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
2034 uint8_t full[16*9];\
2037 uint8_t halfHV[64];\
2038 copy_block9(full, src, 16, stride, 9);\
2039 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
2040 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
2041 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
2042 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
2044 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2045 uint8_t full[16*9];\
2047 copy_block9(full, src, 16, stride, 9);\
2048 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
2049 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
2050 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
2052 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
2053 uint8_t full[16*9];\
2056 uint8_t halfHV[64];\
2057 copy_block9(full, src, 16, stride, 9);\
2058 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
2059 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
2060 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
2061 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
2063 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2064 uint8_t full[16*9];\
2066 copy_block9(full, src, 16, stride, 9);\
2067 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
2068 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
2069 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
2071 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2073 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
2074 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
2076 static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
2077 OPNAME ## pixels16_c(dst, src, stride, 16);\
2080 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2082 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
2083 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
2086 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
2087 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
2090 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2092 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
2093 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
2096 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2097 uint8_t full[24*17];\
2099 copy_block17(full, src, 24, stride, 17);\
2100 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
2101 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
2104 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2105 uint8_t full[24*17];\
2106 copy_block17(full, src, 24, stride, 17);\
2107 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
2110 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2111 uint8_t full[24*17];\
2113 copy_block17(full, src, 24, stride, 17);\
2114 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
2115 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
2117 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
2118 uint8_t full[24*17];\
2119 uint8_t halfH[272];\
2120 uint8_t halfV[256];\
2121 uint8_t halfHV[256];\
2122 copy_block17(full, src, 24, stride, 17);\
2123 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2124 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2125 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2126 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2128 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2129 uint8_t full[24*17];\
2130 uint8_t halfH[272];\
2131 uint8_t halfHV[256];\
2132 copy_block17(full, src, 24, stride, 17);\
2133 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2134 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2135 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2136 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2138 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
2139 uint8_t full[24*17];\
2140 uint8_t halfH[272];\
2141 uint8_t halfV[256];\
2142 uint8_t halfHV[256];\
2143 copy_block17(full, src, 24, stride, 17);\
2144 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2145 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2146 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2147 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2149 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2150 uint8_t full[24*17];\
2151 uint8_t halfH[272];\
2152 uint8_t halfHV[256];\
2153 copy_block17(full, src, 24, stride, 17);\
2154 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2155 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2156 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2157 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2159 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
2160 uint8_t full[24*17];\
2161 uint8_t halfH[272];\
2162 uint8_t halfV[256];\
2163 uint8_t halfHV[256];\
2164 copy_block17(full, src, 24, stride, 17);\
2165 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2166 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2167 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2168 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2170 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2171 uint8_t full[24*17];\
2172 uint8_t halfH[272];\
2173 uint8_t halfHV[256];\
2174 copy_block17(full, src, 24, stride, 17);\
2175 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2176 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2177 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2178 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2180 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
2181 uint8_t full[24*17];\
2182 uint8_t halfH[272];\
2183 uint8_t halfV[256];\
2184 uint8_t halfHV[256];\
2185 copy_block17(full, src, 24, stride, 17);\
2186 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
2187 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2188 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2189 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2191 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2192 uint8_t full[24*17];\
2193 uint8_t halfH[272];\
2194 uint8_t halfHV[256];\
2195 copy_block17(full, src, 24, stride, 17);\
2196 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2197 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2198 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2199 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2201 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2202 uint8_t halfH[272];\
2203 uint8_t halfHV[256];\
2204 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2205 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2206 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2208 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2209 uint8_t halfH[272];\
2210 uint8_t halfHV[256];\
2211 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2212 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2213 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2215 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
2216 uint8_t full[24*17];\
2217 uint8_t halfH[272];\
2218 uint8_t halfV[256];\
2219 uint8_t halfHV[256];\
2220 copy_block17(full, src, 24, stride, 17);\
2221 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2222 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2223 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2224 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2226 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2227 uint8_t full[24*17];\
2228 uint8_t halfH[272];\
2229 copy_block17(full, src, 24, stride, 17);\
2230 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2231 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2232 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2234 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
2235 uint8_t full[24*17];\
2236 uint8_t halfH[272];\
2237 uint8_t halfV[256];\
2238 uint8_t halfHV[256];\
2239 copy_block17(full, src, 24, stride, 17);\
2240 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2241 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2242 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2243 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2245 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2246 uint8_t full[24*17];\
2247 uint8_t halfH[272];\
2248 copy_block17(full, src, 24, stride, 17);\
2249 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2250 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2251 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2253 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2254 uint8_t halfH[272];\
2255 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2256 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2259 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2260 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
2261 #define op_put(a, b) a = cm[((b) + 16)>>5]
2262 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
2264 QPEL_MC(0, put_
, _
, op_put
)
2265 QPEL_MC(1, put_no_rnd_
, _no_rnd_
, op_put_no_rnd
)
2266 QPEL_MC(0, avg_
, _
, op_avg
)
2267 //QPEL_MC(1, avg_no_rnd , _ , op_avg)
2269 #undef op_avg_no_rnd
2271 #undef op_put_no_rnd
2274 #define H264_LOWPASS(OPNAME, OP, OP2) \
2275 static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2277 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2281 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2282 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2288 static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2290 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2294 const int srcB= src[-2*srcStride];\
2295 const int srcA= src[-1*srcStride];\
2296 const int src0= src[0 *srcStride];\
2297 const int src1= src[1 *srcStride];\
2298 const int src2= src[2 *srcStride];\
2299 const int src3= src[3 *srcStride];\
2300 const int src4= src[4 *srcStride];\
2301 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2302 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2308 static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2311 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2313 src -= 2*srcStride;\
2314 for(i=0; i<h+5; i++)\
2316 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2317 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2321 tmp -= tmpStride*(h+5-2);\
2324 const int tmpB= tmp[-2*tmpStride];\
2325 const int tmpA= tmp[-1*tmpStride];\
2326 const int tmp0= tmp[0 *tmpStride];\
2327 const int tmp1= tmp[1 *tmpStride];\
2328 const int tmp2= tmp[2 *tmpStride];\
2329 const int tmp3= tmp[3 *tmpStride];\
2330 const int tmp4= tmp[4 *tmpStride];\
2331 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2332 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2337 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2339 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2343 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2344 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2345 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
2346 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
2352 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2354 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2358 const int srcB= src[-2*srcStride];\
2359 const int srcA= src[-1*srcStride];\
2360 const int src0= src[0 *srcStride];\
2361 const int src1= src[1 *srcStride];\
2362 const int src2= src[2 *srcStride];\
2363 const int src3= src[3 *srcStride];\
2364 const int src4= src[4 *srcStride];\
2365 const int src5= src[5 *srcStride];\
2366 const int src6= src[6 *srcStride];\
2367 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2368 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2369 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2370 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2376 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2379 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2381 src -= 2*srcStride;\
2382 for(i=0; i<h+5; i++)\
2384 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2385 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2386 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
2387 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
2391 tmp -= tmpStride*(h+5-2);\
2394 const int tmpB= tmp[-2*tmpStride];\
2395 const int tmpA= tmp[-1*tmpStride];\
2396 const int tmp0= tmp[0 *tmpStride];\
2397 const int tmp1= tmp[1 *tmpStride];\
2398 const int tmp2= tmp[2 *tmpStride];\
2399 const int tmp3= tmp[3 *tmpStride];\
2400 const int tmp4= tmp[4 *tmpStride];\
2401 const int tmp5= tmp[5 *tmpStride];\
2402 const int tmp6= tmp[6 *tmpStride];\
2403 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2404 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2405 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2406 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2412 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2414 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2418 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
2419 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
2420 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
2421 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
2422 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
2423 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
2424 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
2425 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
2431 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2433 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2437 const int srcB= src[-2*srcStride];\
2438 const int srcA= src[-1*srcStride];\
2439 const int src0= src[0 *srcStride];\
2440 const int src1= src[1 *srcStride];\
2441 const int src2= src[2 *srcStride];\
2442 const int src3= src[3 *srcStride];\
2443 const int src4= src[4 *srcStride];\
2444 const int src5= src[5 *srcStride];\
2445 const int src6= src[6 *srcStride];\
2446 const int src7= src[7 *srcStride];\
2447 const int src8= src[8 *srcStride];\
2448 const int src9= src[9 *srcStride];\
2449 const int src10=src[10*srcStride];\
2450 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2451 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2452 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2453 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2454 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
2455 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
2456 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
2457 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
2463 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2466 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2468 src -= 2*srcStride;\
2469 for(i=0; i<h+5; i++)\
2471 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
2472 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
2473 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
2474 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
2475 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
2476 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
2477 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
2478 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
2482 tmp -= tmpStride*(h+5-2);\
2485 const int tmpB= tmp[-2*tmpStride];\
2486 const int tmpA= tmp[-1*tmpStride];\
2487 const int tmp0= tmp[0 *tmpStride];\
2488 const int tmp1= tmp[1 *tmpStride];\
2489 const int tmp2= tmp[2 *tmpStride];\
2490 const int tmp3= tmp[3 *tmpStride];\
2491 const int tmp4= tmp[4 *tmpStride];\
2492 const int tmp5= tmp[5 *tmpStride];\
2493 const int tmp6= tmp[6 *tmpStride];\
2494 const int tmp7= tmp[7 *tmpStride];\
2495 const int tmp8= tmp[8 *tmpStride];\
2496 const int tmp9= tmp[9 *tmpStride];\
2497 const int tmp10=tmp[10*tmpStride];\
2498 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2499 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2500 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2501 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2502 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
2503 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
2504 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
2505 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
2511 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2512 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
2513 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2514 src += 8*srcStride;\
2515 dst += 8*dstStride;\
2516 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
2517 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2520 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2521 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
2522 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2523 src += 8*srcStride;\
2524 dst += 8*dstStride;\
2525 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
2526 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2529 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2530 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2531 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2532 src += 8*srcStride;\
2533 dst += 8*dstStride;\
2534 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2535 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2538 #define H264_MC(OPNAME, SIZE) \
2539 static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
2540 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
2543 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2544 uint8_t half[SIZE*SIZE];\
2545 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2546 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
2549 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
2550 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
2553 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2554 uint8_t half[SIZE*SIZE];\
2555 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2556 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
2559 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2560 uint8_t full[SIZE*(SIZE+5)];\
2561 uint8_t * const full_mid= full + SIZE*2;\
2562 uint8_t half[SIZE*SIZE];\
2563 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2564 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2565 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
2568 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2569 uint8_t full[SIZE*(SIZE+5)];\
2570 uint8_t * const full_mid= full + SIZE*2;\
2571 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2572 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
2575 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2576 uint8_t full[SIZE*(SIZE+5)];\
2577 uint8_t * const full_mid= full + SIZE*2;\
2578 uint8_t half[SIZE*SIZE];\
2579 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2580 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2581 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
2584 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2585 uint8_t full[SIZE*(SIZE+5)];\
2586 uint8_t * const full_mid= full + SIZE*2;\
2587 uint8_t halfH[SIZE*SIZE];\
2588 uint8_t halfV[SIZE*SIZE];\
2589 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2590 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2591 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2592 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2595 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2596 uint8_t full[SIZE*(SIZE+5)];\
2597 uint8_t * const full_mid= full + SIZE*2;\
2598 uint8_t halfH[SIZE*SIZE];\
2599 uint8_t halfV[SIZE*SIZE];\
2600 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2601 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2602 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2603 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2606 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2607 uint8_t full[SIZE*(SIZE+5)];\
2608 uint8_t * const full_mid= full + SIZE*2;\
2609 uint8_t halfH[SIZE*SIZE];\
2610 uint8_t halfV[SIZE*SIZE];\
2611 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2612 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2613 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2614 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2617 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2618 uint8_t full[SIZE*(SIZE+5)];\
2619 uint8_t * const full_mid= full + SIZE*2;\
2620 uint8_t halfH[SIZE*SIZE];\
2621 uint8_t halfV[SIZE*SIZE];\
2622 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2623 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2624 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2625 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2628 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2629 int16_t tmp[SIZE*(SIZE+5)];\
2630 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
2633 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2634 int16_t tmp[SIZE*(SIZE+5)];\
2635 uint8_t halfH[SIZE*SIZE];\
2636 uint8_t halfHV[SIZE*SIZE];\
2637 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2638 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2639 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2642 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2643 int16_t tmp[SIZE*(SIZE+5)];\
2644 uint8_t halfH[SIZE*SIZE];\
2645 uint8_t halfHV[SIZE*SIZE];\
2646 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2647 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2648 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2651 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2652 uint8_t full[SIZE*(SIZE+5)];\
2653 uint8_t * const full_mid= full + SIZE*2;\
2654 int16_t tmp[SIZE*(SIZE+5)];\
2655 uint8_t halfV[SIZE*SIZE];\
2656 uint8_t halfHV[SIZE*SIZE];\
2657 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2658 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2659 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2660 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2663 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2664 uint8_t full[SIZE*(SIZE+5)];\
2665 uint8_t * const full_mid= full + SIZE*2;\
2666 int16_t tmp[SIZE*(SIZE+5)];\
2667 uint8_t halfV[SIZE*SIZE];\
2668 uint8_t halfHV[SIZE*SIZE];\
2669 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2670 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2671 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2672 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2675 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2676 //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
2677 #define op_put(a, b) a = cm[((b) + 16)>>5]
2678 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
2679 #define op2_put(a, b) a = cm[((b) + 512)>>10]
2681 H264_LOWPASS(put_
, op_put
, op2_put
)
2682 H264_LOWPASS(avg_
, op_avg
, op2_avg
)
2697 #define op_scale1(x) block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom )
2698 #define op_scale2(x) dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
2699 #define H264_WEIGHT(W,H) \
2700 static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
2702 offset <<= log2_denom; \
2703 if(log2_denom) offset += 1<<(log2_denom-1); \
2704 for(y=0; y<H; y++, block += stride){ \
2707 if(W==2) continue; \
2710 if(W==4) continue; \
2715 if(W==8) continue; \
2726 static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
2728 offset = ((offset + 1) | 1) << log2_denom; \
2729 for(y=0; y<H; y++, dst += stride, src += stride){ \
2732 if(W==2) continue; \
2735 if(W==4) continue; \
2740 if(W==8) continue; \
2767 static void wmv2_mspel8_h_lowpass(uint8_t *dst
, uint8_t *src
, int dstStride
, int srcStride
, int h
){
2768 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
2772 dst
[0]= cm
[(9*(src
[0] + src
[1]) - (src
[-1] + src
[2]) + 8)>>4];
2773 dst
[1]= cm
[(9*(src
[1] + src
[2]) - (src
[ 0] + src
[3]) + 8)>>4];
2774 dst
[2]= cm
[(9*(src
[2] + src
[3]) - (src
[ 1] + src
[4]) + 8)>>4];
2775 dst
[3]= cm
[(9*(src
[3] + src
[4]) - (src
[ 2] + src
[5]) + 8)>>4];
2776 dst
[4]= cm
[(9*(src
[4] + src
[5]) - (src
[ 3] + src
[6]) + 8)>>4];
2777 dst
[5]= cm
[(9*(src
[5] + src
[6]) - (src
[ 4] + src
[7]) + 8)>>4];
2778 dst
[6]= cm
[(9*(src
[6] + src
[7]) - (src
[ 5] + src
[8]) + 8)>>4];
2779 dst
[7]= cm
[(9*(src
[7] + src
[8]) - (src
[ 6] + src
[9]) + 8)>>4];
2785 #if CONFIG_CAVS_DECODER
2787 void ff_put_cavs_qpel8_mc00_c(uint8_t *dst
, uint8_t *src
, int stride
) {
2788 put_pixels8_c(dst
, src
, stride
, 8);
2790 void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst
, uint8_t *src
, int stride
) {
2791 avg_pixels8_c(dst
, src
, stride
, 8);
2793 void ff_put_cavs_qpel16_mc00_c(uint8_t *dst
, uint8_t *src
, int stride
) {
2794 put_pixels16_c(dst
, src
, stride
, 16);
2796 void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst
, uint8_t *src
, int stride
) {
2797 avg_pixels16_c(dst
, src
, stride
, 16);
2799 #endif /* CONFIG_CAVS_DECODER */
2801 #if CONFIG_VC1_DECODER
2803 void ff_put_vc1_mspel_mc00_c(uint8_t *dst
, const uint8_t *src
, int stride
, int rnd
) {
2804 put_pixels8_c(dst
, src
, stride
, 8);
2806 void ff_avg_vc1_mspel_mc00_c(uint8_t *dst
, const uint8_t *src
, int stride
, int rnd
) {
2807 avg_pixels8_c(dst
, src
, stride
, 8);
2809 #endif /* CONFIG_VC1_DECODER */
2812 void ff_h264dspenc_init(DSPContext
* c
, AVCodecContext
*avctx
);
2814 #if CONFIG_RV40_DECODER
2815 static void put_rv40_qpel16_mc33_c(uint8_t *dst
, uint8_t *src
, int stride
){
2816 put_pixels16_xy2_c(dst
, src
, stride
, 16);
2818 static void avg_rv40_qpel16_mc33_c(uint8_t *dst
, uint8_t *src
, int stride
){
2819 avg_pixels16_xy2_c(dst
, src
, stride
, 16);
2821 static void put_rv40_qpel8_mc33_c(uint8_t *dst
, uint8_t *src
, int stride
){
2822 put_pixels8_xy2_c(dst
, src
, stride
, 8);
2824 static void avg_rv40_qpel8_mc33_c(uint8_t *dst
, uint8_t *src
, int stride
){
2825 avg_pixels8_xy2_c(dst
, src
, stride
, 8);
2827 #endif /* CONFIG_RV40_DECODER */
2829 static void wmv2_mspel8_v_lowpass(uint8_t *dst
, uint8_t *src
, int dstStride
, int srcStride
, int w
){
2830 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
2834 const int src_1
= src
[ -srcStride
];
2835 const int src0
= src
[0 ];
2836 const int src1
= src
[ srcStride
];
2837 const int src2
= src
[2*srcStride
];
2838 const int src3
= src
[3*srcStride
];
2839 const int src4
= src
[4*srcStride
];
2840 const int src5
= src
[5*srcStride
];
2841 const int src6
= src
[6*srcStride
];
2842 const int src7
= src
[7*srcStride
];
2843 const int src8
= src
[8*srcStride
];
2844 const int src9
= src
[9*srcStride
];
2845 dst
[0*dstStride
]= cm
[(9*(src0
+ src1
) - (src_1
+ src2
) + 8)>>4];
2846 dst
[1*dstStride
]= cm
[(9*(src1
+ src2
) - (src0
+ src3
) + 8)>>4];
2847 dst
[2*dstStride
]= cm
[(9*(src2
+ src3
) - (src1
+ src4
) + 8)>>4];
2848 dst
[3*dstStride
]= cm
[(9*(src3
+ src4
) - (src2
+ src5
) + 8)>>4];
2849 dst
[4*dstStride
]= cm
[(9*(src4
+ src5
) - (src3
+ src6
) + 8)>>4];
2850 dst
[5*dstStride
]= cm
[(9*(src5
+ src6
) - (src4
+ src7
) + 8)>>4];
2851 dst
[6*dstStride
]= cm
[(9*(src6
+ src7
) - (src5
+ src8
) + 8)>>4];
2852 dst
[7*dstStride
]= cm
[(9*(src7
+ src8
) - (src6
+ src9
) + 8)>>4];
2858 static void put_mspel8_mc00_c (uint8_t *dst
, uint8_t *src
, int stride
){
2859 put_pixels8_c(dst
, src
, stride
, 8);
2862 static void put_mspel8_mc10_c(uint8_t *dst
, uint8_t *src
, int stride
){
2864 wmv2_mspel8_h_lowpass(half
, src
, 8, stride
, 8);
2865 put_pixels8_l2(dst
, src
, half
, stride
, stride
, 8, 8);
2868 static void put_mspel8_mc20_c(uint8_t *dst
, uint8_t *src
, int stride
){
2869 wmv2_mspel8_h_lowpass(dst
, src
, stride
, stride
, 8);
2872 static void put_mspel8_mc30_c(uint8_t *dst
, uint8_t *src
, int stride
){
2874 wmv2_mspel8_h_lowpass(half
, src
, 8, stride
, 8);
2875 put_pixels8_l2(dst
, src
+1, half
, stride
, stride
, 8, 8);
2878 static void put_mspel8_mc02_c(uint8_t *dst
, uint8_t *src
, int stride
){
2879 wmv2_mspel8_v_lowpass(dst
, src
, stride
, stride
, 8);
2882 static void put_mspel8_mc12_c(uint8_t *dst
, uint8_t *src
, int stride
){
2886 wmv2_mspel8_h_lowpass(halfH
, src
-stride
, 8, stride
, 11);
2887 wmv2_mspel8_v_lowpass(halfV
, src
, 8, stride
, 8);
2888 wmv2_mspel8_v_lowpass(halfHV
, halfH
+8, 8, 8, 8);
2889 put_pixels8_l2(dst
, halfV
, halfHV
, stride
, 8, 8, 8);
2891 static void put_mspel8_mc32_c(uint8_t *dst
, uint8_t *src
, int stride
){
2895 wmv2_mspel8_h_lowpass(halfH
, src
-stride
, 8, stride
, 11);
2896 wmv2_mspel8_v_lowpass(halfV
, src
+1, 8, stride
, 8);
2897 wmv2_mspel8_v_lowpass(halfHV
, halfH
+8, 8, 8, 8);
2898 put_pixels8_l2(dst
, halfV
, halfHV
, stride
, 8, 8, 8);
2900 static void put_mspel8_mc22_c(uint8_t *dst
, uint8_t *src
, int stride
){
2902 wmv2_mspel8_h_lowpass(halfH
, src
-stride
, 8, stride
, 11);
2903 wmv2_mspel8_v_lowpass(dst
, halfH
+8, stride
, 8, 8);
2906 static void h263_v_loop_filter_c(uint8_t *src
, int stride
, int qscale
){
2907 if(CONFIG_H263_DECODER
|| CONFIG_H263_ENCODER
) {
2909 const int strength
= ff_h263_loop_filter_strength
[qscale
];
2913 int p0
= src
[x
-2*stride
];
2914 int p1
= src
[x
-1*stride
];
2915 int p2
= src
[x
+0*stride
];
2916 int p3
= src
[x
+1*stride
];
2917 int d
= (p0
- p3
+ 4*(p2
- p1
)) / 8;
2919 if (d
<-2*strength
) d1
= 0;
2920 else if(d
<- strength
) d1
=-2*strength
- d
;
2921 else if(d
< strength
) d1
= d
;
2922 else if(d
< 2*strength
) d1
= 2*strength
- d
;
2927 if(p1
&256) p1
= ~(p1
>>31);
2928 if(p2
&256) p2
= ~(p2
>>31);
2930 src
[x
-1*stride
] = p1
;
2931 src
[x
+0*stride
] = p2
;
2935 d2
= av_clip((p0
-p3
)/4, -ad1
, ad1
);
2937 src
[x
-2*stride
] = p0
- d2
;
2938 src
[x
+ stride
] = p3
+ d2
;
2943 static void h263_h_loop_filter_c(uint8_t *src
, int stride
, int qscale
){
2944 if(CONFIG_H263_DECODER
|| CONFIG_H263_ENCODER
) {
2946 const int strength
= ff_h263_loop_filter_strength
[qscale
];
2950 int p0
= src
[y
*stride
-2];
2951 int p1
= src
[y
*stride
-1];
2952 int p2
= src
[y
*stride
+0];
2953 int p3
= src
[y
*stride
+1];
2954 int d
= (p0
- p3
+ 4*(p2
- p1
)) / 8;
2956 if (d
<-2*strength
) d1
= 0;
2957 else if(d
<- strength
) d1
=-2*strength
- d
;
2958 else if(d
< strength
) d1
= d
;
2959 else if(d
< 2*strength
) d1
= 2*strength
- d
;
2964 if(p1
&256) p1
= ~(p1
>>31);
2965 if(p2
&256) p2
= ~(p2
>>31);
2967 src
[y
*stride
-1] = p1
;
2968 src
[y
*stride
+0] = p2
;
2972 d2
= av_clip((p0
-p3
)/4, -ad1
, ad1
);
2974 src
[y
*stride
-2] = p0
- d2
;
2975 src
[y
*stride
+1] = p3
+ d2
;
2980 static void h261_loop_filter_c(uint8_t *src
, int stride
){
2985 temp
[x
] = 4*src
[x
];
2986 temp
[x
+ 7*8] = 4*src
[x
+ 7*stride
];
2990 xy
= y
* stride
+ x
;
2992 temp
[yz
] = src
[xy
- stride
] + 2*src
[xy
] + src
[xy
+ stride
];
2997 src
[ y
*stride
] = (temp
[ y
*8] + 2)>>2;
2998 src
[7+y
*stride
] = (temp
[7+y
*8] + 2)>>2;
3000 xy
= y
* stride
+ x
;
3002 src
[xy
] = (temp
[yz
-1] + 2*temp
[yz
] + temp
[yz
+1] + 8)>>4;
3007 static av_always_inline av_flatten
void h264_loop_filter_luma_c(uint8_t *pix
, int xstride
, int ystride
, int alpha
, int beta
, int8_t *tc0
)
3010 for( i
= 0; i
< 4; i
++ ) {
3015 for( d
= 0; d
< 4; d
++ ) {
3016 const int p0
= pix
[-1*xstride
];
3017 const int p1
= pix
[-2*xstride
];
3018 const int p2
= pix
[-3*xstride
];
3019 const int q0
= pix
[0];
3020 const int q1
= pix
[1*xstride
];
3021 const int q2
= pix
[2*xstride
];
3023 if( FFABS( p0
- q0
) < alpha
&&
3024 FFABS( p1
- p0
) < beta
&&
3025 FFABS( q1
- q0
) < beta
) {
3030 if( FFABS( p2
- p0
) < beta
) {
3032 pix
[-2*xstride
] = p1
+ av_clip( (( p2
+ ( ( p0
+ q0
+ 1 ) >> 1 ) ) >> 1) - p1
, -tc0
[i
], tc0
[i
] );
3035 if( FFABS( q2
- q0
) < beta
) {
3037 pix
[ xstride
] = q1
+ av_clip( (( q2
+ ( ( p0
+ q0
+ 1 ) >> 1 ) ) >> 1) - q1
, -tc0
[i
], tc0
[i
] );
3041 i_delta
= av_clip( (((q0
- p0
) << 2) + (p1
- q1
) + 4) >> 3, -tc
, tc
);
3042 pix
[-xstride
] = av_clip_uint8( p0
+ i_delta
); /* p0' */
3043 pix
[0] = av_clip_uint8( q0
- i_delta
); /* q0' */
3049 static void h264_v_loop_filter_luma_c(uint8_t *pix
, int stride
, int alpha
, int beta
, int8_t *tc0
)
3051 h264_loop_filter_luma_c(pix
, stride
, 1, alpha
, beta
, tc0
);
3053 static void h264_h_loop_filter_luma_c(uint8_t *pix
, int stride
, int alpha
, int beta
, int8_t *tc0
)
3055 h264_loop_filter_luma_c(pix
, 1, stride
, alpha
, beta
, tc0
);
3058 static av_always_inline av_flatten
void h264_loop_filter_luma_intra_c(uint8_t *pix
, int xstride
, int ystride
, int alpha
, int beta
)
3061 for( d
= 0; d
< 16; d
++ ) {
3062 const int p2
= pix
[-3*xstride
];
3063 const int p1
= pix
[-2*xstride
];
3064 const int p0
= pix
[-1*xstride
];
3066 const int q0
= pix
[ 0*xstride
];
3067 const int q1
= pix
[ 1*xstride
];
3068 const int q2
= pix
[ 2*xstride
];
3070 if( FFABS( p0
- q0
) < alpha
&&
3071 FFABS( p1
- p0
) < beta
&&
3072 FFABS( q1
- q0
) < beta
) {
3074 if(FFABS( p0
- q0
) < (( alpha
>> 2 ) + 2 )){
3075 if( FFABS( p2
- p0
) < beta
)
3077 const int p3
= pix
[-4*xstride
];
3079 pix
[-1*xstride
] = ( p2
+ 2*p1
+ 2*p0
+ 2*q0
+ q1
+ 4 ) >> 3;
3080 pix
[-2*xstride
] = ( p2
+ p1
+ p0
+ q0
+ 2 ) >> 2;
3081 pix
[-3*xstride
] = ( 2*p3
+ 3*p2
+ p1
+ p0
+ q0
+ 4 ) >> 3;
3084 pix
[-1*xstride
] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
3086 if( FFABS( q2
- q0
) < beta
)
3088 const int q3
= pix
[3*xstride
];
3090 pix
[0*xstride
] = ( p1
+ 2*p0
+ 2*q0
+ 2*q1
+ q2
+ 4 ) >> 3;
3091 pix
[1*xstride
] = ( p0
+ q0
+ q1
+ q2
+ 2 ) >> 2;
3092 pix
[2*xstride
] = ( 2*q3
+ 3*q2
+ q1
+ q0
+ p0
+ 4 ) >> 3;
3095 pix
[0*xstride
] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
3099 pix
[-1*xstride
] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;