2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 prediction functions.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "bit_depth_template.c"
32 static void FUNCC(pred4x4_vertical
)(uint8_t *_src
, const uint8_t *topright
, int _stride
){
33 pixel
*src
= (pixel
*)_src
;
34 int stride
= _stride
/sizeof(pixel
);
35 const pixel4 a
= AV_RN4PA(src
-stride
);
37 AV_WN4PA(src
+0*stride
, a
);
38 AV_WN4PA(src
+1*stride
, a
);
39 AV_WN4PA(src
+2*stride
, a
);
40 AV_WN4PA(src
+3*stride
, a
);
43 static void FUNCC(pred4x4_horizontal
)(uint8_t *_src
, const uint8_t *topright
, int _stride
){
44 pixel
*src
= (pixel
*)_src
;
45 int stride
= _stride
/sizeof(pixel
);
46 AV_WN4PA(src
+0*stride
, PIXEL_SPLAT_X4(src
[-1+0*stride
]));
47 AV_WN4PA(src
+1*stride
, PIXEL_SPLAT_X4(src
[-1+1*stride
]));
48 AV_WN4PA(src
+2*stride
, PIXEL_SPLAT_X4(src
[-1+2*stride
]));
49 AV_WN4PA(src
+3*stride
, PIXEL_SPLAT_X4(src
[-1+3*stride
]));
52 static void FUNCC(pred4x4_dc
)(uint8_t *_src
, const uint8_t *topright
, int _stride
){
53 pixel
*src
= (pixel
*)_src
;
54 int stride
= _stride
/sizeof(pixel
);
55 const int dc
= ( src
[-stride
] + src
[1-stride
] + src
[2-stride
] + src
[3-stride
]
56 + src
[-1+0*stride
] + src
[-1+1*stride
] + src
[-1+2*stride
] + src
[-1+3*stride
] + 4) >>3;
57 const pixel4 a
= PIXEL_SPLAT_X4(dc
);
59 AV_WN4PA(src
+0*stride
, a
);
60 AV_WN4PA(src
+1*stride
, a
);
61 AV_WN4PA(src
+2*stride
, a
);
62 AV_WN4PA(src
+3*stride
, a
);
65 static void FUNCC(pred4x4_left_dc
)(uint8_t *_src
, const uint8_t *topright
, int _stride
){
66 pixel
*src
= (pixel
*)_src
;
67 int stride
= _stride
/sizeof(pixel
);
68 const int dc
= ( src
[-1+0*stride
] + src
[-1+1*stride
] + src
[-1+2*stride
] + src
[-1+3*stride
] + 2) >>2;
69 const pixel4 a
= PIXEL_SPLAT_X4(dc
);
71 AV_WN4PA(src
+0*stride
, a
);
72 AV_WN4PA(src
+1*stride
, a
);
73 AV_WN4PA(src
+2*stride
, a
);
74 AV_WN4PA(src
+3*stride
, a
);
77 static void FUNCC(pred4x4_top_dc
)(uint8_t *_src
, const uint8_t *topright
, int _stride
){
78 pixel
*src
= (pixel
*)_src
;
79 int stride
= _stride
/sizeof(pixel
);
80 const int dc
= ( src
[-stride
] + src
[1-stride
] + src
[2-stride
] + src
[3-stride
] + 2) >>2;
81 const pixel4 a
= PIXEL_SPLAT_X4(dc
);
83 AV_WN4PA(src
+0*stride
, a
);
84 AV_WN4PA(src
+1*stride
, a
);
85 AV_WN4PA(src
+2*stride
, a
);
86 AV_WN4PA(src
+3*stride
, a
);
89 static void FUNCC(pred4x4_128_dc
)(uint8_t *_src
, const uint8_t *topright
, int _stride
){
90 pixel
*src
= (pixel
*)_src
;
91 int stride
= _stride
/sizeof(pixel
);
92 const pixel4 a
= PIXEL_SPLAT_X4(1<<(BIT_DEPTH
-1));
94 AV_WN4PA(src
+0*stride
, a
);
95 AV_WN4PA(src
+1*stride
, a
);
96 AV_WN4PA(src
+2*stride
, a
);
97 AV_WN4PA(src
+3*stride
, a
);
100 static void FUNCC(pred4x4_127_dc
)(uint8_t *_src
, const uint8_t *topright
, int _stride
){
101 pixel
*src
= (pixel
*)_src
;
102 int stride
= _stride
/sizeof(pixel
);
103 const pixel4 a
= PIXEL_SPLAT_X4((1<<(BIT_DEPTH
-1))-1);
105 AV_WN4PA(src
+0*stride
, a
);
106 AV_WN4PA(src
+1*stride
, a
);
107 AV_WN4PA(src
+2*stride
, a
);
108 AV_WN4PA(src
+3*stride
, a
);
111 static void FUNCC(pred4x4_129_dc
)(uint8_t *_src
, const uint8_t *topright
, int _stride
){
112 pixel
*src
= (pixel
*)_src
;
113 int stride
= _stride
/sizeof(pixel
);
114 const pixel4 a
= PIXEL_SPLAT_X4((1<<(BIT_DEPTH
-1))+1);
116 AV_WN4PA(src
+0*stride
, a
);
117 AV_WN4PA(src
+1*stride
, a
);
118 AV_WN4PA(src
+2*stride
, a
);
119 AV_WN4PA(src
+3*stride
, a
);
123 #define LOAD_TOP_RIGHT_EDGE\
124 const unsigned av_unused t4 = topright[0];\
125 const unsigned av_unused t5 = topright[1];\
126 const unsigned av_unused t6 = topright[2];\
127 const unsigned av_unused t7 = topright[3];\
129 #define LOAD_DOWN_LEFT_EDGE\
130 const unsigned av_unused l4 = src[-1+4*stride];\
131 const unsigned av_unused l5 = src[-1+5*stride];\
132 const unsigned av_unused l6 = src[-1+6*stride];\
133 const unsigned av_unused l7 = src[-1+7*stride];\
135 #define LOAD_LEFT_EDGE\
136 const unsigned av_unused l0 = src[-1+0*stride];\
137 const unsigned av_unused l1 = src[-1+1*stride];\
138 const unsigned av_unused l2 = src[-1+2*stride];\
139 const unsigned av_unused l3 = src[-1+3*stride];\
141 #define LOAD_TOP_EDGE\
142 const unsigned av_unused t0 = src[ 0-1*stride];\
143 const unsigned av_unused t1 = src[ 1-1*stride];\
144 const unsigned av_unused t2 = src[ 2-1*stride];\
145 const unsigned av_unused t3 = src[ 3-1*stride];\
147 static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright, int _stride){
148 pixel
*src
= (pixel
*)_src
;
149 int stride
= _stride
/sizeof(pixel
);
150 const int lt
= src
[-1-1*stride
];
154 src
[0+3*stride
]=(l3
+ 2*l2
+ l1
+ 2)>>2;
156 src
[1+3*stride
]=(l2
+ 2*l1
+ l0
+ 2)>>2;
159 src
[2+3*stride
]=(l1
+ 2*l0
+ lt
+ 2)>>2;
163 src
[3+3*stride
]=(l0
+ 2*lt
+ t0
+ 2)>>2;
166 src
[3+2*stride
]=(lt
+ 2*t0
+ t1
+ 2)>>2;
168 src
[3+1*stride
]=(t0
+ 2*t1
+ t2
+ 2)>>2;
169 src
[3+0*stride
]=(t1
+ 2*t2
+ t3
+ 2)>>2;
172 static void FUNCC(pred4x4_down_left
)(uint8_t *_src
, const uint8_t *_topright
, int _stride
){
173 pixel
*src
= (pixel
*)_src
;
174 const pixel
*topright
= (const pixel
*)_topright
;
175 int stride
= _stride
/sizeof(pixel
);
180 src
[0+0*stride
]=(t0
+ t2
+ 2*t1
+ 2)>>2;
182 src
[0+1*stride
]=(t1
+ t3
+ 2*t2
+ 2)>>2;
185 src
[0+2*stride
]=(t2
+ t4
+ 2*t3
+ 2)>>2;
189 src
[0+3*stride
]=(t3
+ t5
+ 2*t4
+ 2)>>2;
192 src
[1+3*stride
]=(t4
+ t6
+ 2*t5
+ 2)>>2;
194 src
[2+3*stride
]=(t5
+ t7
+ 2*t6
+ 2)>>2;
195 src
[3+3*stride
]=(t6
+ 3*t7
+ 2)>>2;
198 static void FUNCC(pred4x4_vertical_right
)(uint8_t *_src
, const uint8_t *topright
, int _stride
){
199 pixel
*src
= (pixel
*)_src
;
200 int stride
= _stride
/sizeof(pixel
);
201 const int lt
= src
[-1-1*stride
];
206 src
[1+2*stride
]=(lt
+ t0
+ 1)>>1;
208 src
[2+2*stride
]=(t0
+ t1
+ 1)>>1;
210 src
[3+2*stride
]=(t1
+ t2
+ 1)>>1;
211 src
[3+0*stride
]=(t2
+ t3
+ 1)>>1;
213 src
[1+3*stride
]=(l0
+ 2*lt
+ t0
+ 2)>>2;
215 src
[2+3*stride
]=(lt
+ 2*t0
+ t1
+ 2)>>2;
217 src
[3+3*stride
]=(t0
+ 2*t1
+ t2
+ 2)>>2;
218 src
[3+1*stride
]=(t1
+ 2*t2
+ t3
+ 2)>>2;
219 src
[0+2*stride
]=(lt
+ 2*l0
+ l1
+ 2)>>2;
220 src
[0+3*stride
]=(l0
+ 2*l1
+ l2
+ 2)>>2;
223 static void FUNCC(pred4x4_vertical_left
)(uint8_t *_src
, const uint8_t *_topright
, int _stride
){
224 pixel
*src
= (pixel
*)_src
;
225 const pixel
*topright
= (const pixel
*)_topright
;
226 int stride
= _stride
/sizeof(pixel
);
230 src
[0+0*stride
]=(t0
+ t1
+ 1)>>1;
232 src
[0+2*stride
]=(t1
+ t2
+ 1)>>1;
234 src
[1+2*stride
]=(t2
+ t3
+ 1)>>1;
236 src
[2+2*stride
]=(t3
+ t4
+ 1)>>1;
237 src
[3+2*stride
]=(t4
+ t5
+ 1)>>1;
238 src
[0+1*stride
]=(t0
+ 2*t1
+ t2
+ 2)>>2;
240 src
[0+3*stride
]=(t1
+ 2*t2
+ t3
+ 2)>>2;
242 src
[1+3*stride
]=(t2
+ 2*t3
+ t4
+ 2)>>2;
244 src
[2+3*stride
]=(t3
+ 2*t4
+ t5
+ 2)>>2;
245 src
[3+3*stride
]=(t4
+ 2*t5
+ t6
+ 2)>>2;
248 static void FUNCC(pred4x4_horizontal_up
)(uint8_t *_src
, const uint8_t *topright
, int _stride
){
249 pixel
*src
= (pixel
*)_src
;
250 int stride
= _stride
/sizeof(pixel
);
253 src
[0+0*stride
]=(l0
+ l1
+ 1)>>1;
254 src
[1+0*stride
]=(l0
+ 2*l1
+ l2
+ 2)>>2;
256 src
[0+1*stride
]=(l1
+ l2
+ 1)>>1;
258 src
[1+1*stride
]=(l1
+ 2*l2
+ l3
+ 2)>>2;
260 src
[0+2*stride
]=(l2
+ l3
+ 1)>>1;
262 src
[1+2*stride
]=(l2
+ 2*l3
+ l3
+ 2)>>2;
271 static void FUNCC(pred4x4_horizontal_down
)(uint8_t *_src
, const uint8_t *topright
, int _stride
){
272 pixel
*src
= (pixel
*)_src
;
273 int stride
= _stride
/sizeof(pixel
);
274 const int lt
= src
[-1-1*stride
];
279 src
[2+1*stride
]=(lt
+ l0
+ 1)>>1;
281 src
[3+1*stride
]=(l0
+ 2*lt
+ t0
+ 2)>>2;
282 src
[2+0*stride
]=(lt
+ 2*t0
+ t1
+ 2)>>2;
283 src
[3+0*stride
]=(t0
+ 2*t1
+ t2
+ 2)>>2;
285 src
[2+2*stride
]=(l0
+ l1
+ 1)>>1;
287 src
[3+2*stride
]=(lt
+ 2*l0
+ l1
+ 2)>>2;
289 src
[2+3*stride
]=(l1
+ l2
+ 1)>>1;
291 src
[3+3*stride
]=(l0
+ 2*l1
+ l2
+ 2)>>2;
292 src
[0+3*stride
]=(l2
+ l3
+ 1)>>1;
293 src
[1+3*stride
]=(l1
+ 2*l2
+ l3
+ 2)>>2;
296 static void FUNCC(pred16x16_vertical
)(uint8_t *_src
, int _stride
){
298 pixel
*src
= (pixel
*)_src
;
299 int stride
= _stride
/sizeof(pixel
);
300 const pixel4 a
= AV_RN4PA(((pixel4
*)(src
-stride
))+0);
301 const pixel4 b
= AV_RN4PA(((pixel4
*)(src
-stride
))+1);
302 const pixel4 c
= AV_RN4PA(((pixel4
*)(src
-stride
))+2);
303 const pixel4 d
= AV_RN4PA(((pixel4
*)(src
-stride
))+3);
306 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, a
);
307 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, b
);
308 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+2, c
);
309 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+3, d
);
313 static void FUNCC(pred16x16_horizontal
)(uint8_t *_src
, int stride
){
315 pixel
*src
= (pixel
*)_src
;
316 stride
/= sizeof(pixel
);
319 const pixel4 a
= PIXEL_SPLAT_X4(src
[-1+i
*stride
]);
321 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, a
);
322 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, a
);
323 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+2, a
);
324 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+3, a
);
328 #define PREDICT_16x16_DC(v)\
329 for(i=0; i<16; i++){\
330 AV_WN4PA(src+ 0, v);\
331 AV_WN4PA(src+ 4, v);\
332 AV_WN4PA(src+ 8, v);\
333 AV_WN4PA(src+12, v);\
337 static void FUNCC(pred16x16_dc
)(uint8_t *_src
, int stride
){
339 pixel
*src
= (pixel
*)_src
;
341 stride
/= sizeof(pixel
);
344 dc
+= src
[-1+i
*stride
];
351 dcsplat
= PIXEL_SPLAT_X4((dc
+16)>>5);
352 PREDICT_16x16_DC(dcsplat
);
355 static void FUNCC(pred16x16_left_dc
)(uint8_t *_src
, int stride
){
357 pixel
*src
= (pixel
*)_src
;
359 stride
/= sizeof(pixel
);
362 dc
+= src
[-1+i
*stride
];
365 dcsplat
= PIXEL_SPLAT_X4((dc
+8)>>4);
366 PREDICT_16x16_DC(dcsplat
);
369 static void FUNCC(pred16x16_top_dc
)(uint8_t *_src
, int stride
){
371 pixel
*src
= (pixel
*)_src
;
373 stride
/= sizeof(pixel
);
379 dcsplat
= PIXEL_SPLAT_X4((dc
+8)>>4);
380 PREDICT_16x16_DC(dcsplat
);
383 #define PRED16x16_X(n, v) \
384 static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, int stride){\
386 pixel *src = (pixel*)_src;\
387 stride /= sizeof(pixel);\
388 PREDICT_16x16_DC(PIXEL_SPLAT_X4(v));\
391 PRED16x16_X(127, (1<<(BIT_DEPTH
-1))-1);
392 PRED16x16_X(128, (1<<(BIT_DEPTH
-1))+0);
393 PRED16x16_X(129, (1<<(BIT_DEPTH
-1))+1);
395 static inline void FUNCC(pred16x16_plane_compat
)(uint8_t *_src
, int _stride
, const int svq3
, const int rv40
){
399 pixel
*src
= (pixel
*)_src
;
400 int stride
= _stride
/sizeof(pixel
);
401 const pixel
* const src0
= src
+7-stride
;
402 const pixel
* src1
= src
+8*stride
-1;
403 const pixel
* src2
= src1
-2*stride
; // == src+6*stride-1;
404 int H
= src0
[1] - src0
[-1];
405 int V
= src1
[0] - src2
[ 0];
406 for(k
=2; k
<=8; ++k
) {
407 src1
+= stride
; src2
-= stride
;
408 H
+= k
*(src0
[k
] - src0
[-k
]);
409 V
+= k
*(src1
[0] - src2
[ 0]);
412 H
= ( 5*(H
/4) ) / 16;
413 V
= ( 5*(V
/4) ) / 16;
415 /* required for 100% accuracy */
418 H
= ( H
+ (H
>>2) ) >> 4;
419 V
= ( V
+ (V
>>2) ) >> 4;
425 a
= 16*(src1
[0] + src2
[16] + 1) - 7*(V
+H
);
426 for(j
=16; j
>0; --j
) {
429 for(i
=-16; i
<0; i
+=4) {
430 src
[16+i
] = CLIP((b
) >> 5);
431 src
[17+i
] = CLIP((b
+ H
) >> 5);
432 src
[18+i
] = CLIP((b
+2*H
) >> 5);
433 src
[19+i
] = CLIP((b
+3*H
) >> 5);
440 static void FUNCC(pred16x16_plane
)(uint8_t *src
, int stride
){
441 FUNCC(pred16x16_plane_compat
)(src
, stride
, 0, 0);
444 static void FUNCC(pred8x8_vertical
)(uint8_t *_src
, int _stride
){
446 pixel
*src
= (pixel
*)_src
;
447 int stride
= _stride
/sizeof(pixel
);
448 const pixel4 a
= AV_RN4PA(((pixel4
*)(src
-stride
))+0);
449 const pixel4 b
= AV_RN4PA(((pixel4
*)(src
-stride
))+1);
452 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, a
);
453 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, b
);
457 static void FUNCC(pred8x8_horizontal
)(uint8_t *_src
, int stride
){
459 pixel
*src
= (pixel
*)_src
;
460 stride
/= sizeof(pixel
);
463 const pixel4 a
= PIXEL_SPLAT_X4(src
[-1+i
*stride
]);
464 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, a
);
465 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, a
);
469 #define PRED8x8_X(n, v)\
470 static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, int stride){\
472 const pixel4 a = PIXEL_SPLAT_X4(v);\
473 pixel *src = (pixel*)_src;\
474 stride /= sizeof(pixel);\
476 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);\
477 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);\
481 PRED8x8_X(127, (1<<(BIT_DEPTH
-1))-1);
482 PRED8x8_X(128, (1<<(BIT_DEPTH
-1))+0);
483 PRED8x8_X(129, (1<<(BIT_DEPTH
-1))+1);
485 static void FUNCC(pred8x8_left_dc
)(uint8_t *_src
, int stride
){
488 pixel4 dc0splat
, dc2splat
;
489 pixel
*src
= (pixel
*)_src
;
490 stride
/= sizeof(pixel
);
494 dc0
+= src
[-1+i
*stride
];
495 dc2
+= src
[-1+(i
+4)*stride
];
497 dc0splat
= PIXEL_SPLAT_X4((dc0
+ 2)>>2);
498 dc2splat
= PIXEL_SPLAT_X4((dc2
+ 2)>>2);
501 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc0splat
);
502 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc0splat
);
505 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc2splat
);
506 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc2splat
);
510 static void FUNCC(pred8x8_top_dc
)(uint8_t *_src
, int stride
){
513 pixel4 dc0splat
, dc1splat
;
514 pixel
*src
= (pixel
*)_src
;
515 stride
/= sizeof(pixel
);
520 dc1
+= src
[4+i
-stride
];
522 dc0splat
= PIXEL_SPLAT_X4((dc0
+ 2)>>2);
523 dc1splat
= PIXEL_SPLAT_X4((dc1
+ 2)>>2);
526 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc0splat
);
527 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc1splat
);
530 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc0splat
);
531 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc1splat
);
535 static void FUNCC(pred8x8_dc
)(uint8_t *_src
, int stride
){
538 pixel4 dc0splat
, dc1splat
, dc2splat
, dc3splat
;
539 pixel
*src
= (pixel
*)_src
;
540 stride
/= sizeof(pixel
);
544 dc0
+= src
[-1+i
*stride
] + src
[i
-stride
];
545 dc1
+= src
[4+i
-stride
];
546 dc2
+= src
[-1+(i
+4)*stride
];
548 dc0splat
= PIXEL_SPLAT_X4((dc0
+ 4)>>3);
549 dc1splat
= PIXEL_SPLAT_X4((dc1
+ 2)>>2);
550 dc2splat
= PIXEL_SPLAT_X4((dc2
+ 2)>>2);
551 dc3splat
= PIXEL_SPLAT_X4((dc1
+ dc2
+ 4)>>3);
554 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc0splat
);
555 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc1splat
);
558 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc2splat
);
559 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc3splat
);
563 //the following 4 function should not be optimized!
564 static void FUNC(pred8x8_mad_cow_dc_l0t
)(uint8_t *src
, int stride
){
565 FUNCC(pred8x8_top_dc
)(src
, stride
);
566 FUNCC(pred4x4_dc
)(src
, NULL
, stride
);
569 static void FUNC(pred8x8_mad_cow_dc_0lt
)(uint8_t *src
, int stride
){
570 FUNCC(pred8x8_dc
)(src
, stride
);
571 FUNCC(pred4x4_top_dc
)(src
, NULL
, stride
);
574 static void FUNC(pred8x8_mad_cow_dc_l00
)(uint8_t *src
, int stride
){
575 FUNCC(pred8x8_left_dc
)(src
, stride
);
576 FUNCC(pred4x4_128_dc
)(src
+ 4*stride
, NULL
, stride
);
577 FUNCC(pred4x4_128_dc
)(src
+ 4*stride
+ 4*sizeof(pixel
), NULL
, stride
);
580 static void FUNC(pred8x8_mad_cow_dc_0l0
)(uint8_t *src
, int stride
){
581 FUNCC(pred8x8_left_dc
)(src
, stride
);
582 FUNCC(pred4x4_128_dc
)(src
, NULL
, stride
);
583 FUNCC(pred4x4_128_dc
)(src
+ 4*sizeof(pixel
), NULL
, stride
);
586 static void FUNCC(pred8x8_plane
)(uint8_t *_src
, int _stride
){
590 pixel
*src
= (pixel
*)_src
;
591 int stride
= _stride
/sizeof(pixel
);
592 const pixel
* const src0
= src
+3-stride
;
593 const pixel
* src1
= src
+4*stride
-1;
594 const pixel
* src2
= src1
-2*stride
; // == src+2*stride-1;
595 int H
= src0
[1] - src0
[-1];
596 int V
= src1
[0] - src2
[ 0];
597 for(k
=2; k
<=4; ++k
) {
598 src1
+= stride
; src2
-= stride
;
599 H
+= k
*(src0
[k
] - src0
[-k
]);
600 V
+= k
*(src1
[0] - src2
[ 0]);
602 H
= ( 17*H
+16 ) >> 5;
603 V
= ( 17*V
+16 ) >> 5;
605 a
= 16*(src1
[0] + src2
[8]+1) - 3*(V
+H
);
609 src
[0] = CLIP((b
) >> 5);
610 src
[1] = CLIP((b
+ H
) >> 5);
611 src
[2] = CLIP((b
+2*H
) >> 5);
612 src
[3] = CLIP((b
+3*H
) >> 5);
613 src
[4] = CLIP((b
+4*H
) >> 5);
614 src
[5] = CLIP((b
+5*H
) >> 5);
615 src
[6] = CLIP((b
+6*H
) >> 5);
616 src
[7] = CLIP((b
+7*H
) >> 5);
621 #define SRC(x,y) src[(x)+(y)*stride]
623 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
624 #define PREDICT_8x8_LOAD_LEFT \
625 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
626 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
627 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
628 const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
631 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
632 #define PREDICT_8x8_LOAD_TOP \
633 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
634 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
635 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
636 const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
637 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
640 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
641 #define PREDICT_8x8_LOAD_TOPRIGHT \
642 int t8, t9, t10, t11, t12, t13, t14, t15; \
644 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
645 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
646 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
648 #define PREDICT_8x8_LOAD_TOPLEFT \
649 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
651 #define PREDICT_8x8_DC(v) \
653 for( y = 0; y < 8; y++ ) { \
654 AV_WN4PA(((pixel4*)src)+0, v); \
655 AV_WN4PA(((pixel4*)src)+1, v); \
659 static void FUNCC(pred8x8l_128_dc
)(uint8_t *_src
, int has_topleft
, int has_topright
, int _stride
)
661 pixel
*src
= (pixel
*)_src
;
662 int stride
= _stride
/sizeof(pixel
);
664 PREDICT_8x8_DC(PIXEL_SPLAT_X4(1<<(BIT_DEPTH
-1)));
666 static void FUNCC(pred8x8l_left_dc
)(uint8_t *_src
, int has_topleft
, int has_topright
, int _stride
)
668 pixel
*src
= (pixel
*)_src
;
669 int stride
= _stride
/sizeof(pixel
);
671 PREDICT_8x8_LOAD_LEFT
;
672 const pixel4 dc
= PIXEL_SPLAT_X4((l0
+l1
+l2
+l3
+l4
+l5
+l6
+l7
+4) >> 3);
675 static void FUNCC(pred8x8l_top_dc
)(uint8_t *_src
, int has_topleft
, int has_topright
, int _stride
)
677 pixel
*src
= (pixel
*)_src
;
678 int stride
= _stride
/sizeof(pixel
);
680 PREDICT_8x8_LOAD_TOP
;
681 const pixel4 dc
= PIXEL_SPLAT_X4((t0
+t1
+t2
+t3
+t4
+t5
+t6
+t7
+4) >> 3);
684 static void FUNCC(pred8x8l_dc
)(uint8_t *_src
, int has_topleft
, int has_topright
, int _stride
)
686 pixel
*src
= (pixel
*)_src
;
687 int stride
= _stride
/sizeof(pixel
);
689 PREDICT_8x8_LOAD_LEFT
;
690 PREDICT_8x8_LOAD_TOP
;
691 const pixel4 dc
= PIXEL_SPLAT_X4((l0
+l1
+l2
+l3
+l4
+l5
+l6
+l7
692 +t0
+t1
+t2
+t3
+t4
+t5
+t6
+t7
+8) >> 4);
695 static void FUNCC(pred8x8l_horizontal
)(uint8_t *_src
, int has_topleft
, int has_topright
, int _stride
)
697 pixel
*src
= (pixel
*)_src
;
698 int stride
= _stride
/sizeof(pixel
);
701 PREDICT_8x8_LOAD_LEFT
;
702 #define ROW(y) a = PIXEL_SPLAT_X4(l##y); \
703 AV_WN4PA(src+y*stride, a); \
704 AV_WN4PA(src+y*stride+4, a);
705 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
708 static void FUNCC(pred8x8l_vertical
)(uint8_t *_src
, int has_topleft
, int has_topright
, int _stride
)
711 pixel
*src
= (pixel
*)_src
;
712 int stride
= _stride
/sizeof(pixel
);
715 PREDICT_8x8_LOAD_TOP
;
724 a
= AV_RN4PA(((pixel4
*)src
)+0);
725 b
= AV_RN4PA(((pixel4
*)src
)+1);
726 for( y
= 1; y
< 8; y
++ ) {
727 AV_WN4PA(((pixel4
*)(src
+y
*stride
))+0, a
);
728 AV_WN4PA(((pixel4
*)(src
+y
*stride
))+1, b
);
731 static void FUNCC(pred8x8l_down_left
)(uint8_t *_src
, int has_topleft
, int has_topright
, int _stride
)
733 pixel
*src
= (pixel
*)_src
;
734 int stride
= _stride
/sizeof(pixel
);
735 PREDICT_8x8_LOAD_TOP
;
736 PREDICT_8x8_LOAD_TOPRIGHT
;
737 SRC(0,0)= (t0
+ 2*t1
+ t2
+ 2) >> 2;
738 SRC(0,1)=SRC(1,0)= (t1
+ 2*t2
+ t3
+ 2) >> 2;
739 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2
+ 2*t3
+ t4
+ 2) >> 2;
740 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3
+ 2*t4
+ t5
+ 2) >> 2;
741 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4
+ 2*t5
+ t6
+ 2) >> 2;
742 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5
+ 2*t6
+ t7
+ 2) >> 2;
743 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6
+ 2*t7
+ t8
+ 2) >> 2;
744 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7
+ 2*t8
+ t9
+ 2) >> 2;
745 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8
+ 2*t9
+ t10
+ 2) >> 2;
746 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9
+ 2*t10
+ t11
+ 2) >> 2;
747 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10
+ 2*t11
+ t12
+ 2) >> 2;
748 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11
+ 2*t12
+ t13
+ 2) >> 2;
749 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12
+ 2*t13
+ t14
+ 2) >> 2;
750 SRC(6,7)=SRC(7,6)= (t13
+ 2*t14
+ t15
+ 2) >> 2;
751 SRC(7,7)= (t14
+ 3*t15
+ 2) >> 2;
753 static void FUNCC(pred8x8l_down_right
)(uint8_t *_src
, int has_topleft
, int has_topright
, int _stride
)
755 pixel
*src
= (pixel
*)_src
;
756 int stride
= _stride
/sizeof(pixel
);
757 PREDICT_8x8_LOAD_TOP
;
758 PREDICT_8x8_LOAD_LEFT
;
759 PREDICT_8x8_LOAD_TOPLEFT
;
760 SRC(0,7)= (l7
+ 2*l6
+ l5
+ 2) >> 2;
761 SRC(0,6)=SRC(1,7)= (l6
+ 2*l5
+ l4
+ 2) >> 2;
762 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5
+ 2*l4
+ l3
+ 2) >> 2;
763 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4
+ 2*l3
+ l2
+ 2) >> 2;
764 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3
+ 2*l2
+ l1
+ 2) >> 2;
765 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2
+ 2*l1
+ l0
+ 2) >> 2;
766 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1
+ 2*l0
+ lt
+ 2) >> 2;
767 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0
+ 2*lt
+ t0
+ 2) >> 2;
768 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt
+ 2*t0
+ t1
+ 2) >> 2;
769 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0
+ 2*t1
+ t2
+ 2) >> 2;
770 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1
+ 2*t2
+ t3
+ 2) >> 2;
771 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2
+ 2*t3
+ t4
+ 2) >> 2;
772 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3
+ 2*t4
+ t5
+ 2) >> 2;
773 SRC(6,0)=SRC(7,1)= (t4
+ 2*t5
+ t6
+ 2) >> 2;
774 SRC(7,0)= (t5
+ 2*t6
+ t7
+ 2) >> 2;
776 static void FUNCC(pred8x8l_vertical_right
)(uint8_t *_src
, int has_topleft
, int has_topright
, int _stride
)
778 pixel
*src
= (pixel
*)_src
;
779 int stride
= _stride
/sizeof(pixel
);
780 PREDICT_8x8_LOAD_TOP
;
781 PREDICT_8x8_LOAD_LEFT
;
782 PREDICT_8x8_LOAD_TOPLEFT
;
783 SRC(0,6)= (l5
+ 2*l4
+ l3
+ 2) >> 2;
784 SRC(0,7)= (l6
+ 2*l5
+ l4
+ 2) >> 2;
785 SRC(0,4)=SRC(1,6)= (l3
+ 2*l2
+ l1
+ 2) >> 2;
786 SRC(0,5)=SRC(1,7)= (l4
+ 2*l3
+ l2
+ 2) >> 2;
787 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1
+ 2*l0
+ lt
+ 2) >> 2;
788 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2
+ 2*l1
+ l0
+ 2) >> 2;
789 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0
+ 2*lt
+ t0
+ 2) >> 2;
790 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt
+ t0
+ 1) >> 1;
791 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt
+ 2*t0
+ t1
+ 2) >> 2;
792 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0
+ t1
+ 1) >> 1;
793 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0
+ 2*t1
+ t2
+ 2) >> 2;
794 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1
+ t2
+ 1) >> 1;
795 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1
+ 2*t2
+ t3
+ 2) >> 2;
796 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2
+ t3
+ 1) >> 1;
797 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2
+ 2*t3
+ t4
+ 2) >> 2;
798 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3
+ t4
+ 1) >> 1;
799 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3
+ 2*t4
+ t5
+ 2) >> 2;
800 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4
+ t5
+ 1) >> 1;
801 SRC(6,1)=SRC(7,3)= (t4
+ 2*t5
+ t6
+ 2) >> 2;
802 SRC(6,0)=SRC(7,2)= (t5
+ t6
+ 1) >> 1;
803 SRC(7,1)= (t5
+ 2*t6
+ t7
+ 2) >> 2;
804 SRC(7,0)= (t6
+ t7
+ 1) >> 1;
806 static void FUNCC(pred8x8l_horizontal_down
)(uint8_t *_src
, int has_topleft
, int has_topright
, int _stride
)
808 pixel
*src
= (pixel
*)_src
;
809 int stride
= _stride
/sizeof(pixel
);
810 PREDICT_8x8_LOAD_TOP
;
811 PREDICT_8x8_LOAD_LEFT
;
812 PREDICT_8x8_LOAD_TOPLEFT
;
813 SRC(0,7)= (l6
+ l7
+ 1) >> 1;
814 SRC(1,7)= (l5
+ 2*l6
+ l7
+ 2) >> 2;
815 SRC(0,6)=SRC(2,7)= (l5
+ l6
+ 1) >> 1;
816 SRC(1,6)=SRC(3,7)= (l4
+ 2*l5
+ l6
+ 2) >> 2;
817 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4
+ l5
+ 1) >> 1;
818 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3
+ 2*l4
+ l5
+ 2) >> 2;
819 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3
+ l4
+ 1) >> 1;
820 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2
+ 2*l3
+ l4
+ 2) >> 2;
821 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2
+ l3
+ 1) >> 1;
822 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1
+ 2*l2
+ l3
+ 2) >> 2;
823 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1
+ l2
+ 1) >> 1;
824 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0
+ 2*l1
+ l2
+ 2) >> 2;
825 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0
+ l1
+ 1) >> 1;
826 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt
+ 2*l0
+ l1
+ 2) >> 2;
827 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt
+ l0
+ 1) >> 1;
828 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0
+ 2*lt
+ t0
+ 2) >> 2;
829 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1
+ 2*t0
+ lt
+ 2) >> 2;
830 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2
+ 2*t1
+ t0
+ 2) >> 2;
831 SRC(4,0)=SRC(6,1)= (t3
+ 2*t2
+ t1
+ 2) >> 2;
832 SRC(5,0)=SRC(7,1)= (t4
+ 2*t3
+ t2
+ 2) >> 2;
833 SRC(6,0)= (t5
+ 2*t4
+ t3
+ 2) >> 2;
834 SRC(7,0)= (t6
+ 2*t5
+ t4
+ 2) >> 2;
836 static void FUNCC(pred8x8l_vertical_left
)(uint8_t *_src
, int has_topleft
, int has_topright
, int _stride
)
838 pixel
*src
= (pixel
*)_src
;
839 int stride
= _stride
/sizeof(pixel
);
840 PREDICT_8x8_LOAD_TOP
;
841 PREDICT_8x8_LOAD_TOPRIGHT
;
842 SRC(0,0)= (t0
+ t1
+ 1) >> 1;
843 SRC(0,1)= (t0
+ 2*t1
+ t2
+ 2) >> 2;
844 SRC(0,2)=SRC(1,0)= (t1
+ t2
+ 1) >> 1;
845 SRC(0,3)=SRC(1,1)= (t1
+ 2*t2
+ t3
+ 2) >> 2;
846 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2
+ t3
+ 1) >> 1;
847 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2
+ 2*t3
+ t4
+ 2) >> 2;
848 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3
+ t4
+ 1) >> 1;
849 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3
+ 2*t4
+ t5
+ 2) >> 2;
850 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4
+ t5
+ 1) >> 1;
851 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4
+ 2*t5
+ t6
+ 2) >> 2;
852 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5
+ t6
+ 1) >> 1;
853 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5
+ 2*t6
+ t7
+ 2) >> 2;
854 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6
+ t7
+ 1) >> 1;
855 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6
+ 2*t7
+ t8
+ 2) >> 2;
856 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7
+ t8
+ 1) >> 1;
857 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7
+ 2*t8
+ t9
+ 2) >> 2;
858 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8
+ t9
+ 1) >> 1;
859 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8
+ 2*t9
+ t10
+ 2) >> 2;
860 SRC(6,6)=SRC(7,4)= (t9
+ t10
+ 1) >> 1;
861 SRC(6,7)=SRC(7,5)= (t9
+ 2*t10
+ t11
+ 2) >> 2;
862 SRC(7,6)= (t10
+ t11
+ 1) >> 1;
863 SRC(7,7)= (t10
+ 2*t11
+ t12
+ 2) >> 2;
865 static void FUNCC(pred8x8l_horizontal_up
)(uint8_t *_src
, int has_topleft
, int has_topright
, int _stride
)
867 pixel
*src
= (pixel
*)_src
;
868 int stride
= _stride
/sizeof(pixel
);
869 PREDICT_8x8_LOAD_LEFT
;
870 SRC(0,0)= (l0
+ l1
+ 1) >> 1;
871 SRC(1,0)= (l0
+ 2*l1
+ l2
+ 2) >> 2;
872 SRC(0,1)=SRC(2,0)= (l1
+ l2
+ 1) >> 1;
873 SRC(1,1)=SRC(3,0)= (l1
+ 2*l2
+ l3
+ 2) >> 2;
874 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2
+ l3
+ 1) >> 1;
875 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2
+ 2*l3
+ l4
+ 2) >> 2;
876 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3
+ l4
+ 1) >> 1;
877 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3
+ 2*l4
+ l5
+ 2) >> 2;
878 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4
+ l5
+ 1) >> 1;
879 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4
+ 2*l5
+ l6
+ 2) >> 2;
880 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5
+ l6
+ 1) >> 1;
881 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5
+ 2*l6
+ l7
+ 2) >> 2;
882 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6
+ l7
+ 1) >> 1;
883 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6
+ 3*l7
+ 2) >> 2;
884 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
885 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
886 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
887 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7
;
889 #undef PREDICT_8x8_LOAD_LEFT
890 #undef PREDICT_8x8_LOAD_TOP
891 #undef PREDICT_8x8_LOAD_TOPLEFT
892 #undef PREDICT_8x8_LOAD_TOPRIGHT
893 #undef PREDICT_8x8_DC
899 static void FUNCC(pred4x4_vertical_add
)(uint8_t *_pix
, const DCTELEM
*_block
, int stride
){
901 pixel
*pix
= (pixel
*)_pix
;
902 const dctcoef
*block
= (const dctcoef
*)_block
;
903 stride
/= sizeof(pixel
);
907 pix
[1*stride
]= v
+= block
[0];
908 pix
[2*stride
]= v
+= block
[4];
909 pix
[3*stride
]= v
+= block
[8];
910 pix
[4*stride
]= v
+ block
[12];
916 static void FUNCC(pred4x4_horizontal_add
)(uint8_t *_pix
, const DCTELEM
*_block
, int stride
){
918 pixel
*pix
= (pixel
*)_pix
;
919 const dctcoef
*block
= (const dctcoef
*)_block
;
920 stride
/= sizeof(pixel
);
923 pix
[0]= v
+= block
[0];
924 pix
[1]= v
+= block
[1];
925 pix
[2]= v
+= block
[2];
926 pix
[3]= v
+ block
[3];
932 static void FUNCC(pred8x8l_vertical_add
)(uint8_t *_pix
, const DCTELEM
*_block
, int stride
){
934 pixel
*pix
= (pixel
*)_pix
;
935 const dctcoef
*block
= (const dctcoef
*)_block
;
936 stride
/= sizeof(pixel
);
940 pix
[1*stride
]= v
+= block
[0];
941 pix
[2*stride
]= v
+= block
[8];
942 pix
[3*stride
]= v
+= block
[16];
943 pix
[4*stride
]= v
+= block
[24];
944 pix
[5*stride
]= v
+= block
[32];
945 pix
[6*stride
]= v
+= block
[40];
946 pix
[7*stride
]= v
+= block
[48];
947 pix
[8*stride
]= v
+ block
[56];
953 static void FUNCC(pred8x8l_horizontal_add
)(uint8_t *_pix
, const DCTELEM
*_block
, int stride
){
955 pixel
*pix
= (pixel
*)_pix
;
956 const dctcoef
*block
= (const dctcoef
*)_block
;
957 stride
/= sizeof(pixel
);
960 pix
[0]= v
+= block
[0];
961 pix
[1]= v
+= block
[1];
962 pix
[2]= v
+= block
[2];
963 pix
[3]= v
+= block
[3];
964 pix
[4]= v
+= block
[4];
965 pix
[5]= v
+= block
[5];
966 pix
[6]= v
+= block
[6];
967 pix
[7]= v
+ block
[7];
973 static void FUNCC(pred16x16_vertical_add
)(uint8_t *pix
, const int *block_offset
, const DCTELEM
*block
, int stride
){
976 FUNCC(pred4x4_vertical_add
)(pix
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
979 static void FUNCC(pred16x16_horizontal_add
)(uint8_t *pix
, const int *block_offset
, const DCTELEM
*block
, int stride
){
982 FUNCC(pred4x4_horizontal_add
)(pix
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
985 static void FUNCC(pred8x8_vertical_add
)(uint8_t *pix
, const int *block_offset
, const DCTELEM
*block
, int stride
){
988 FUNCC(pred4x4_vertical_add
)(pix
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
991 static void FUNCC(pred8x8_horizontal_add
)(uint8_t *pix
, const int *block_offset
, const DCTELEM
*block
, int stride
){
994 FUNCC(pred4x4_horizontal_add
)(pix
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);