2 * VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
8 * This file is part of FFmpeg.
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30 #include "rectangle.h"
40 // todo: make it possible to check for at least (i4x4 or split_mv)
41 // in one op. are others needed?
50 AVCodecContext
*avctx
;
54 vp8_mc_func put_pixels_tab
[3][3][3];
57 uint8_t *edge_emu_buffer
;
58 VP56RangeCoder c
; ///< header context, includes mb modes and motion vectors
61 int mb_width
; /* number of horizontal MB */
62 int mb_height
; /* number of vertical MB */
68 int update_last
; ///< update VP56_FRAME_PREVIOUS with the current one
69 int update_golden
; ///< VP56_FRAME_NONE if not updated, or which frame to copy if so
74 * If this flag is not set, all the probability updates
75 * are discarded after this frame is decoded.
77 int update_probabilities
;
80 * All coefficients are contained in separate arith coding contexts.
81 * There can be 1, 2, 4, or 8 of these after the header context.
83 int num_coeff_partitions
;
84 VP56RangeCoder coeff_partition
[8];
86 VP8Macroblock
*macroblocks
;
87 VP8Macroblock
*macroblocks_base
;
88 VP8FilterStrength
*filter_strength
;
91 uint8_t *intra4x4_pred_mode
;
92 uint8_t *intra4x4_pred_mode_base
;
93 uint8_t *segmentation_map
;
97 * Cache of the top row needed for intra prediction
98 * 16 for luma, 8 for each chroma plane
100 uint8_t (*top_border
)[16+8+8];
103 * For coeff decode, we need to know whether the above block had non-zero
104 * coefficients. This means for each macroblock, we need data for 4 luma
105 * blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9
106 * per macroblock. We keep the last row in top_nnz.
108 uint8_t (*top_nnz
)[9];
109 DECLARE_ALIGNED(8, uint8_t, left_nnz
)[9];
112 * This is the index plus one of the last non-zero coeff
113 * for each of the blocks in the current macroblock.
115 * 1 -> dc-only (special transform)
116 * 2+-> full transform
118 DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache
)[6][4];
119 DECLARE_ALIGNED(16, DCTELEM
, block
)[6][4][16];
120 uint8_t intra4x4_pred_mode_mb
[16];
122 int chroma_pred_mode
; ///< 8x8c pred mode of the current macroblock
123 int segment
; ///< segment of the current macroblock
126 int sign_bias
[4]; ///< one state [0, 1] per ref frame type
130 * Base parameters for segmentation, i.e. per-macroblock parameters.
131 * These must be kept unchanged even if segmentation is not used for
132 * a frame, since the values persist between interframes.
138 int8_t base_quant
[4];
139 int8_t filter_level
[4]; ///< base loop filter level
143 * Macroblocks can have one of 4 different quants in a frame when
144 * segmentation is enabled.
145 * If segmentation is disabled, only the first segment's values are used.
148 // [0] - DC qmul [1] - AC qmul
149 int16_t luma_qmul
[2];
150 int16_t luma_dc_qmul
[2]; ///< luma dc-only block quant
151 int16_t chroma_qmul
[2];
161 int enabled
; ///< whether each mb can have a different strength based on mode/ref
164 * filter strength adjustment for the following macroblock modes:
167 * [2] - inter modes except for zero or split mv
169 * i16x16 modes never have any adjustment
174 * filter strength adjustment for macroblocks that reference:
175 * [0] - intra / VP56_FRAME_CURRENT
176 * [1] - VP56_FRAME_PREVIOUS
177 * [2] - VP56_FRAME_GOLDEN
178 * [3] - altref / VP56_FRAME_GOLDEN2
184 * These are all of the updatable probabilities for binary decisions.
185 * They are only implictly reset on keyframes, making it quite likely
186 * for an interframe to desync if a prior frame's header was corrupt
187 * or missing outright!
190 uint8_t segmentid
[3];
195 uint8_t pred16x16
[4];
197 uint8_t token
[4][8][3][NUM_DCT_TOKENS
-1];
202 static void vp8_decode_flush(AVCodecContext
*avctx
)
204 VP8Context
*s
= avctx
->priv_data
;
207 for (i
= 0; i
< 4; i
++)
208 if (s
->frames
[i
].data
[0])
209 avctx
->release_buffer(avctx
, &s
->frames
[i
]);
210 memset(s
->framep
, 0, sizeof(s
->framep
));
212 av_freep(&s
->macroblocks_base
);
213 av_freep(&s
->filter_strength
);
214 av_freep(&s
->intra4x4_pred_mode_base
);
215 av_freep(&s
->top_nnz
);
216 av_freep(&s
->edge_emu_buffer
);
217 av_freep(&s
->top_border
);
218 av_freep(&s
->segmentation_map
);
220 s
->macroblocks
= NULL
;
221 s
->intra4x4_pred_mode
= NULL
;
224 static int update_dimensions(VP8Context
*s
, int width
, int height
)
228 if (avcodec_check_dimensions(s
->avctx
, width
, height
))
229 return AVERROR_INVALIDDATA
;
231 vp8_decode_flush(s
->avctx
);
233 avcodec_set_dimensions(s
->avctx
, width
, height
);
235 s
->mb_width
= (s
->avctx
->coded_width
+15) / 16;
236 s
->mb_height
= (s
->avctx
->coded_height
+15) / 16;
238 // we allocate a border around the top/left of intra4x4 modes
239 // this is 4 blocks for intra4x4 to keep 4-byte alignment for fill_rectangle
240 s
->mb_stride
= s
->mb_width
+1;
241 s
->b4_stride
= 4*s
->mb_stride
;
243 s
->macroblocks_base
= av_mallocz((s
->mb_stride
+s
->mb_height
*2+2)*sizeof(*s
->macroblocks
));
244 s
->filter_strength
= av_mallocz(s
->mb_stride
*sizeof(*s
->filter_strength
));
245 s
->intra4x4_pred_mode_base
= av_mallocz(s
->b4_stride
*(4*s
->mb_height
+1));
246 s
->top_nnz
= av_mallocz(s
->mb_width
*sizeof(*s
->top_nnz
));
247 s
->top_border
= av_mallocz((s
->mb_width
+1)*sizeof(*s
->top_border
));
248 s
->segmentation_map
= av_mallocz(s
->mb_stride
*s
->mb_height
);
250 if (!s
->macroblocks_base
|| !s
->filter_strength
|| !s
->intra4x4_pred_mode_base
||
251 !s
->top_nnz
|| !s
->top_border
|| !s
->segmentation_map
)
252 return AVERROR(ENOMEM
);
254 s
->macroblocks
= s
->macroblocks_base
+ 1;
255 s
->intra4x4_pred_mode
= s
->intra4x4_pred_mode_base
+ 4 + s
->b4_stride
;
257 memset(s
->intra4x4_pred_mode_base
, DC_PRED
, s
->b4_stride
);
258 for (i
= 0; i
< 4*s
->mb_height
; i
++)
259 s
->intra4x4_pred_mode
[i
*s
->b4_stride
-1] = DC_PRED
;
264 static void parse_segment_info(VP8Context
*s
)
266 VP56RangeCoder
*c
= &s
->c
;
269 s
->segmentation
.update_map
= vp8_rac_get(c
);
271 if (vp8_rac_get(c
)) { // update segment feature data
272 s
->segmentation
.absolute_vals
= vp8_rac_get(c
);
274 for (i
= 0; i
< 4; i
++)
275 s
->segmentation
.base_quant
[i
] = vp8_rac_get_sint(c
, 7);
277 for (i
= 0; i
< 4; i
++)
278 s
->segmentation
.filter_level
[i
] = vp8_rac_get_sint(c
, 6);
280 if (s
->segmentation
.update_map
)
281 for (i
= 0; i
< 3; i
++)
282 s
->prob
->segmentid
[i
] = vp8_rac_get(c
) ?
vp8_rac_get_uint(c
, 8) : 255;
285 static void update_lf_deltas(VP8Context
*s
)
287 VP56RangeCoder
*c
= &s
->c
;
290 for (i
= 0; i
< 4; i
++)
291 s
->lf_delta
.ref
[i
] = vp8_rac_get_sint(c
, 6);
293 for (i
= 0; i
< 4; i
++)
294 s
->lf_delta
.mode
[i
] = vp8_rac_get_sint(c
, 6);
297 static int setup_partitions(VP8Context
*s
, const uint8_t *buf
, int buf_size
)
299 const uint8_t *sizes
= buf
;
302 s
->num_coeff_partitions
= 1 << vp8_rac_get_uint(&s
->c
, 2);
304 buf
+= 3*(s
->num_coeff_partitions
-1);
305 buf_size
-= 3*(s
->num_coeff_partitions
-1);
309 for (i
= 0; i
< s
->num_coeff_partitions
-1; i
++) {
310 int size
= AV_RL24(sizes
+ 3*i
);
311 if (buf_size
- size
< 0)
314 vp56_init_range_decoder(&s
->coeff_partition
[i
], buf
, size
);
318 vp56_init_range_decoder(&s
->coeff_partition
[i
], buf
, buf_size
);
323 static void get_quants(VP8Context
*s
)
325 VP56RangeCoder
*c
= &s
->c
;
328 int yac_qi
= vp8_rac_get_uint(c
, 7);
329 int ydc_delta
= vp8_rac_get_sint(c
, 4);
330 int y2dc_delta
= vp8_rac_get_sint(c
, 4);
331 int y2ac_delta
= vp8_rac_get_sint(c
, 4);
332 int uvdc_delta
= vp8_rac_get_sint(c
, 4);
333 int uvac_delta
= vp8_rac_get_sint(c
, 4);
335 for (i
= 0; i
< 4; i
++) {
336 if (s
->segmentation
.enabled
) {
337 base_qi
= s
->segmentation
.base_quant
[i
];
338 if (!s
->segmentation
.absolute_vals
)
343 s
->qmat
[i
].luma_qmul
[0] = vp8_dc_qlookup
[av_clip(base_qi
+ ydc_delta
, 0, 127)];
344 s
->qmat
[i
].luma_qmul
[1] = vp8_ac_qlookup
[av_clip(base_qi
, 0, 127)];
345 s
->qmat
[i
].luma_dc_qmul
[0] = 2 * vp8_dc_qlookup
[av_clip(base_qi
+ y2dc_delta
, 0, 127)];
346 s
->qmat
[i
].luma_dc_qmul
[1] = 155 * vp8_ac_qlookup
[av_clip(base_qi
+ y2ac_delta
, 0, 127)] / 100;
347 s
->qmat
[i
].chroma_qmul
[0] = vp8_dc_qlookup
[av_clip(base_qi
+ uvdc_delta
, 0, 127)];
348 s
->qmat
[i
].chroma_qmul
[1] = vp8_ac_qlookup
[av_clip(base_qi
+ uvac_delta
, 0, 127)];
350 s
->qmat
[i
].luma_dc_qmul
[1] = FFMAX(s
->qmat
[i
].luma_dc_qmul
[1], 8);
351 s
->qmat
[i
].chroma_qmul
[0] = FFMIN(s
->qmat
[i
].chroma_qmul
[0], 132);
356 * Determine which buffers golden and altref should be updated with after this frame.
357 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
359 * Intra frames update all 3 references
360 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
361 * If the update (golden|altref) flag is set, it's updated with the current frame
362 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
363 * If the flag is not set, the number read means:
365 * 1: VP56_FRAME_PREVIOUS
366 * 2: update golden with altref, or update altref with golden
368 static VP56Frame
ref_to_update(VP8Context
*s
, int update
, VP56Frame ref
)
370 VP56RangeCoder
*c
= &s
->c
;
373 return VP56_FRAME_CURRENT
;
375 switch (vp8_rac_get_uint(c
, 2)) {
377 return VP56_FRAME_PREVIOUS
;
379 return (ref
== VP56_FRAME_GOLDEN
) ? VP56_FRAME_GOLDEN2
: VP56_FRAME_GOLDEN
;
381 return VP56_FRAME_NONE
;
384 static void update_refs(VP8Context
*s
)
386 VP56RangeCoder
*c
= &s
->c
;
388 int update_golden
= vp8_rac_get(c
);
389 int update_altref
= vp8_rac_get(c
);
391 s
->update_golden
= ref_to_update(s
, update_golden
, VP56_FRAME_GOLDEN
);
392 s
->update_altref
= ref_to_update(s
, update_altref
, VP56_FRAME_GOLDEN2
);
395 static int decode_frame_header(VP8Context
*s
, const uint8_t *buf
, int buf_size
)
397 VP56RangeCoder
*c
= &s
->c
;
398 int header_size
, hscale
, vscale
, i
, j
, k
, l
, ret
;
399 int width
= s
->avctx
->width
;
400 int height
= s
->avctx
->height
;
402 s
->keyframe
= !(buf
[0] & 1);
403 s
->profile
= (buf
[0]>>1) & 7;
404 s
->invisible
= !(buf
[0] & 0x10);
405 header_size
= AV_RL24(buf
) >> 5;
410 av_log(s
->avctx
, AV_LOG_WARNING
, "Unknown profile %d\n", s
->profile
);
413 memcpy(s
->put_pixels_tab
, s
->vp8dsp
.put_vp8_epel_pixels_tab
, sizeof(s
->put_pixels_tab
));
414 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
415 memcpy(s
->put_pixels_tab
, s
->vp8dsp
.put_vp8_bilinear_pixels_tab
, sizeof(s
->put_pixels_tab
));
417 if (header_size
> buf_size
- 7*s
->keyframe
) {
418 av_log(s
->avctx
, AV_LOG_ERROR
, "Header size larger than data provided\n");
419 return AVERROR_INVALIDDATA
;
423 if (AV_RL24(buf
) != 0x2a019d) {
424 av_log(s
->avctx
, AV_LOG_ERROR
, "Invalid start code 0x%x\n", AV_RL24(buf
));
425 return AVERROR_INVALIDDATA
;
427 width
= AV_RL16(buf
+3) & 0x3fff;
428 height
= AV_RL16(buf
+5) & 0x3fff;
429 hscale
= buf
[4] >> 6;
430 vscale
= buf
[6] >> 6;
434 if (hscale
|| vscale
)
435 av_log_missing_feature(s
->avctx
, "Upscaling", 1);
437 s
->update_golden
= s
->update_altref
= VP56_FRAME_CURRENT
;
438 memcpy(s
->prob
->token
, vp8_token_default_probs
, sizeof(s
->prob
->token
));
439 memcpy(s
->prob
->pred16x16
, vp8_pred16x16_prob_inter
, sizeof(s
->prob
->pred16x16
));
440 memcpy(s
->prob
->pred8x8c
, vp8_pred8x8c_prob_inter
, sizeof(s
->prob
->pred8x8c
));
441 memcpy(s
->prob
->mvc
, vp8_mv_default_prob
, sizeof(s
->prob
->mvc
));
442 memset(&s
->segmentation
, 0, sizeof(s
->segmentation
));
445 if (!s
->macroblocks_base
|| /* first frame */
446 width
!= s
->avctx
->width
|| height
!= s
->avctx
->height
) {
447 if ((ret
= update_dimensions(s
, width
, height
) < 0))
451 vp56_init_range_decoder(c
, buf
, header_size
);
453 buf_size
-= header_size
;
457 av_log(s
->avctx
, AV_LOG_WARNING
, "Unspecified colorspace\n");
458 vp8_rac_get(c
); // whether we can skip clamping in dsp functions
461 if ((s
->segmentation
.enabled
= vp8_rac_get(c
)))
462 parse_segment_info(s
);
464 s
->segmentation
.update_map
= 0; // FIXME: move this to some init function?
466 s
->filter
.simple
= vp8_rac_get(c
);
467 s
->filter
.level
= vp8_rac_get_uint(c
, 6);
468 s
->filter
.sharpness
= vp8_rac_get_uint(c
, 3);
470 if ((s
->lf_delta
.enabled
= vp8_rac_get(c
)))
474 if (setup_partitions(s
, buf
, buf_size
)) {
475 av_log(s
->avctx
, AV_LOG_ERROR
, "Invalid partitions\n");
476 return AVERROR_INVALIDDATA
;
483 s
->sign_bias
[VP56_FRAME_GOLDEN
] = vp8_rac_get(c
);
484 s
->sign_bias
[VP56_FRAME_GOLDEN2
/* altref */] = vp8_rac_get(c
);
487 // if we aren't saving this frame's probabilities for future frames,
488 // make a copy of the current probabilities
489 if (!(s
->update_probabilities
= vp8_rac_get(c
)))
490 s
->prob
[1] = s
->prob
[0];
492 s
->update_last
= s
->keyframe
|| vp8_rac_get(c
);
494 for (i
= 0; i
< 4; i
++)
495 for (j
= 0; j
< 8; j
++)
496 for (k
= 0; k
< 3; k
++)
497 for (l
= 0; l
< NUM_DCT_TOKENS
-1; l
++)
498 if (vp56_rac_get_prob_branchy(c
, vp8_token_update_probs
[i
][j
][k
][l
]))
499 s
->prob
->token
[i
][j
][k
][l
] = vp8_rac_get_uint(c
, 8);
501 if ((s
->mbskip_enabled
= vp8_rac_get(c
)))
502 s
->prob
->mbskip
= vp8_rac_get_uint(c
, 8);
505 s
->prob
->intra
= vp8_rac_get_uint(c
, 8);
506 s
->prob
->last
= vp8_rac_get_uint(c
, 8);
507 s
->prob
->golden
= vp8_rac_get_uint(c
, 8);
510 for (i
= 0; i
< 4; i
++)
511 s
->prob
->pred16x16
[i
] = vp8_rac_get_uint(c
, 8);
513 for (i
= 0; i
< 3; i
++)
514 s
->prob
->pred8x8c
[i
] = vp8_rac_get_uint(c
, 8);
516 // 17.2 MV probability update
517 for (i
= 0; i
< 2; i
++)
518 for (j
= 0; j
< 19; j
++)
519 if (vp56_rac_get_prob_branchy(c
, vp8_mv_update_prob
[i
][j
]))
520 s
->prob
->mvc
[i
][j
] = vp8_rac_get_nn(c
);
526 static av_always_inline
527 void clamp_mv(VP8Context
*s
, VP56mv
*dst
, const VP56mv
*src
, int mb_x
, int mb_y
)
529 #define MARGIN (16 << 2)
530 dst
->x
= av_clip(src
->x
, -((mb_x
<< 6) + MARGIN
),
531 ((s
->mb_width
- 1 - mb_x
) << 6) + MARGIN
);
532 dst
->y
= av_clip(src
->y
, -((mb_y
<< 6) + MARGIN
),
533 ((s
->mb_height
- 1 - mb_y
) << 6) + MARGIN
);
536 static av_always_inline
537 void find_near_mvs(VP8Context
*s
, VP8Macroblock
*mb
, int mb_x
, int mb_y
,
538 VP56mv near
[2], VP56mv
*best
, uint8_t cnt
[4])
540 VP8Macroblock
*mb_edge
[3] = { mb
+ 2 /* top */,
542 mb
+ 1 /* top-left */ };
543 enum { EDGE_TOP
, EDGE_LEFT
, EDGE_TOPLEFT
};
544 VP56mv near_mv
[4] = {{ 0 }};
545 enum { CNT_ZERO
, CNT_NEAREST
, CNT_NEAR
, CNT_SPLITMV
};
547 int best_idx
= CNT_ZERO
;
548 int cur_sign_bias
= s
->sign_bias
[mb
->ref_frame
];
549 int *sign_bias
= s
->sign_bias
;
551 /* Process MB on top, left and top-left */
552 #define MV_EDGE_CHECK(n)\
554 VP8Macroblock *edge = mb_edge[n];\
555 int edge_ref = edge->ref_frame;\
556 if (edge_ref != VP56_FRAME_CURRENT) {\
557 uint32_t mv = AV_RN32A(&edge->mv);\
559 if (cur_sign_bias != sign_bias[edge_ref]) {\
560 /* SWAR negate of the values in mv. */\
562 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
564 if (!n || mv != AV_RN32A(&near_mv[idx]))\
565 AV_WN32A(&near_mv[++idx], mv);\
566 cnt[idx] += 1 + (n != 2);\
568 cnt[CNT_ZERO] += 1 + (n != 2);\
575 /* If we have three distinct MVs, merge first and last if they're the same */
576 if (cnt
[CNT_SPLITMV
] && AV_RN32A(&near_mv
[1+EDGE_TOP
]) == AV_RN32A(&near_mv
[1+EDGE_TOPLEFT
]))
577 cnt
[CNT_NEAREST
] += 1;
579 cnt
[CNT_SPLITMV
] = ((mb_edge
[EDGE_LEFT
]->mode
== VP8_MVMODE_SPLIT
) +
580 (mb_edge
[EDGE_TOP
]->mode
== VP8_MVMODE_SPLIT
)) * 2 +
581 (mb_edge
[EDGE_TOPLEFT
]->mode
== VP8_MVMODE_SPLIT
);
583 /* Swap near and nearest if necessary */
584 if (cnt
[CNT_NEAR
] > cnt
[CNT_NEAREST
]) {
585 FFSWAP(uint8_t, cnt
[CNT_NEAREST
], cnt
[CNT_NEAR
]);
586 FFSWAP( VP56mv
, near_mv
[CNT_NEAREST
], near_mv
[CNT_NEAR
]);
589 /* Choose the best mv out of 0,0 and the nearest mv */
590 if (cnt
[CNT_NEAREST
] >= cnt
[CNT_ZERO
])
591 best_idx
= CNT_NEAREST
;
593 mb
->mv
= near_mv
[best_idx
];
594 near
[0] = near_mv
[CNT_NEAREST
];
595 near
[1] = near_mv
[CNT_NEAR
];
599 * Motion vector coding, 17.1.
601 static int read_mv_component(VP56RangeCoder
*c
, const uint8_t *p
)
605 if (vp56_rac_get_prob_branchy(c
, p
[0])) {
608 for (i
= 0; i
< 3; i
++)
609 x
+= vp56_rac_get_prob(c
, p
[9 + i
]) << i
;
610 for (i
= 9; i
> 3; i
--)
611 x
+= vp56_rac_get_prob(c
, p
[9 + i
]) << i
;
612 if (!(x
& 0xFFF0) || vp56_rac_get_prob(c
, p
[12]))
616 const uint8_t *ps
= p
+2;
617 bit
= vp56_rac_get_prob(c
, *ps
);
620 bit
= vp56_rac_get_prob(c
, *ps
);
623 x
+= vp56_rac_get_prob(c
, *ps
);
626 return (x
&& vp56_rac_get_prob(c
, p
[1])) ?
-x
: x
;
629 static av_always_inline
630 const uint8_t *get_submv_prob(uint32_t left
, uint32_t top
)
633 return vp8_submv_prob
[4-!!left
];
635 return vp8_submv_prob
[2];
636 return vp8_submv_prob
[1-!!left
];
640 * Split motion vector prediction, 16.4.
641 * @returns the number of motion vectors parsed (2, 4 or 16)
643 static av_always_inline
644 int decode_splitmvs(VP8Context
*s
, VP56RangeCoder
*c
, VP8Macroblock
*mb
)
646 int part_idx
= mb
->partitioning
=
647 vp8_rac_get_tree(c
, vp8_mbsplit_tree
, vp8_mbsplit_prob
);
648 int n
, num
= vp8_mbsplit_count
[part_idx
];
649 VP8Macroblock
*top_mb
= &mb
[2];
650 VP8Macroblock
*left_mb
= &mb
[-1];
651 const uint8_t *mbsplits_left
= vp8_mbsplits
[left_mb
->partitioning
],
652 *mbsplits_top
= vp8_mbsplits
[top_mb
->partitioning
],
653 *mbsplits_cur
= vp8_mbsplits
[part_idx
],
654 *firstidx
= vp8_mbfirstidx
[part_idx
];
655 VP56mv
*top_mv
= top_mb
->bmv
;
656 VP56mv
*left_mv
= left_mb
->bmv
;
657 VP56mv
*cur_mv
= mb
->bmv
;
659 for (n
= 0; n
< num
; n
++) {
661 uint32_t left
, above
;
662 const uint8_t *submv_prob
;
665 left
= AV_RN32A(&left_mv
[mbsplits_left
[k
+ 3]]);
667 left
= AV_RN32A(&cur_mv
[mbsplits_cur
[k
- 1]]);
669 above
= AV_RN32A(&top_mv
[mbsplits_top
[k
+ 12]]);
671 above
= AV_RN32A(&cur_mv
[mbsplits_cur
[k
- 4]]);
673 submv_prob
= get_submv_prob(left
, above
);
675 switch (vp8_rac_get_tree(c
, vp8_submv_ref_tree
, submv_prob
)) {
676 case VP8_SUBMVMODE_NEW4X4
:
677 mb
->bmv
[n
].y
= mb
->mv
.y
+ read_mv_component(c
, s
->prob
->mvc
[0]);
678 mb
->bmv
[n
].x
= mb
->mv
.x
+ read_mv_component(c
, s
->prob
->mvc
[1]);
680 case VP8_SUBMVMODE_ZERO4X4
:
681 AV_ZERO32(&mb
->bmv
[n
]);
683 case VP8_SUBMVMODE_LEFT4X4
:
684 AV_WN32A(&mb
->bmv
[n
], left
);
686 case VP8_SUBMVMODE_TOP4X4
:
687 AV_WN32A(&mb
->bmv
[n
], above
);
695 static av_always_inline
696 void decode_intra4x4_modes(VP56RangeCoder
*c
, uint8_t *intra4x4
,
697 int stride
, int keyframe
)
703 for (y
= 0; y
< 4; y
++) {
704 for (x
= 0; x
< 4; x
++) {
705 t
= intra4x4
[x
- stride
];
707 ctx
= vp8_pred4x4_prob_intra
[t
][l
];
708 intra4x4
[x
] = vp8_rac_get_tree(c
, vp8_pred4x4_tree
, ctx
);
713 for (i
= 0; i
< 16; i
++)
714 intra4x4
[i
] = vp8_rac_get_tree(c
, vp8_pred4x4_tree
, vp8_pred4x4_prob_inter
);
718 static av_always_inline
719 void decode_mb_mode(VP8Context
*s
, VP8Macroblock
*mb
, int mb_x
, int mb_y
,
720 uint8_t *intra4x4
, uint8_t *segment
)
722 VP56RangeCoder
*c
= &s
->c
;
724 if (s
->segmentation
.update_map
)
725 *segment
= vp8_rac_get_tree(c
, vp8_segmentid_tree
, s
->prob
->segmentid
);
726 s
->segment
= *segment
;
728 mb
->skip
= s
->mbskip_enabled ?
vp56_rac_get_prob(c
, s
->prob
->mbskip
) : 0;
731 mb
->mode
= vp8_rac_get_tree(c
, vp8_pred16x16_tree_intra
, vp8_pred16x16_prob_intra
);
733 if (mb
->mode
== MODE_I4x4
) {
734 decode_intra4x4_modes(c
, intra4x4
, s
->b4_stride
, 1);
736 fill_rectangle(intra4x4
, 4, 4, s
->b4_stride
, vp8_pred4x4_mode
[mb
->mode
], 1);
738 s
->chroma_pred_mode
= vp8_rac_get_tree(c
, vp8_pred8x8c_tree
, vp8_pred8x8c_prob_intra
);
739 mb
->ref_frame
= VP56_FRAME_CURRENT
;
740 } else if (vp56_rac_get_prob_branchy(c
, s
->prob
->intra
)) {
741 VP56mv near
[2], best
;
742 uint8_t cnt
[4] = { 0 };
746 if (vp56_rac_get_prob_branchy(c
, s
->prob
->last
))
747 mb
->ref_frame
= vp56_rac_get_prob(c
, s
->prob
->golden
) ?
748 VP56_FRAME_GOLDEN2
/* altref */ : VP56_FRAME_GOLDEN
;
750 mb
->ref_frame
= VP56_FRAME_PREVIOUS
;
751 s
->ref_count
[mb
->ref_frame
-1]++;
753 // motion vectors, 16.3
754 find_near_mvs(s
, mb
, mb_x
, mb_y
, near
, &best
, cnt
);
755 p
[0] = vp8_mode_contexts
[cnt
[0]][0];
756 p
[1] = vp8_mode_contexts
[cnt
[1]][1];
757 p
[2] = vp8_mode_contexts
[cnt
[2]][2];
758 p
[3] = vp8_mode_contexts
[cnt
[3]][3];
759 mb
->mode
= vp8_rac_get_tree(c
, vp8_pred16x16_tree_mvinter
, p
);
761 case VP8_MVMODE_SPLIT
:
762 clamp_mv(s
, &mb
->mv
, &mb
->mv
, mb_x
, mb_y
);
763 mb
->mv
= mb
->bmv
[decode_splitmvs(s
, c
, mb
) - 1];
765 case VP8_MVMODE_ZERO
:
768 case VP8_MVMODE_NEAREST
:
769 clamp_mv(s
, &mb
->mv
, &near
[0], mb_x
, mb_y
);
771 case VP8_MVMODE_NEAR
:
772 clamp_mv(s
, &mb
->mv
, &near
[1], mb_x
, mb_y
);
775 clamp_mv(s
, &mb
->mv
, &mb
->mv
, mb_x
, mb_y
);
776 mb
->mv
.y
+= + read_mv_component(c
, s
->prob
->mvc
[0]);
777 mb
->mv
.x
+= + read_mv_component(c
, s
->prob
->mvc
[1]);
780 if (mb
->mode
!= VP8_MVMODE_SPLIT
) {
781 mb
->partitioning
= VP8_SPLITMVMODE_NONE
;
786 mb
->mode
= vp8_rac_get_tree(c
, vp8_pred16x16_tree_inter
, s
->prob
->pred16x16
);
788 if (mb
->mode
== MODE_I4x4
)
789 decode_intra4x4_modes(c
, intra4x4
, 4, 0);
791 s
->chroma_pred_mode
= vp8_rac_get_tree(c
, vp8_pred8x8c_tree
, s
->prob
->pred8x8c
);
792 mb
->ref_frame
= VP56_FRAME_CURRENT
;
793 mb
->partitioning
= VP8_SPLITMVMODE_NONE
;
794 AV_ZERO32(&mb
->bmv
[0]);
799 * @param c arithmetic bitstream reader context
800 * @param block destination for block coefficients
801 * @param probs probabilities to use when reading trees from the bitstream
802 * @param i initial coeff index, 0 unless a separate DC block is coded
803 * @param zero_nhood the initial prediction context for number of surrounding
804 * all-zero blocks (only left/top, so 0-2)
805 * @param qmul array holding the dc/ac dequant factor at position 0/1
806 * @return 0 if no coeffs were decoded
807 * otherwise, the index of the last coeff decoded plus one
809 static int decode_block_coeffs(VP56RangeCoder
*c
, DCTELEM block
[16],
810 uint8_t probs
[8][3][NUM_DCT_TOKENS
-1],
811 int i
, int zero_nhood
, int16_t qmul
[2])
818 token_prob
= probs
[vp8_coeff_band
[i
]][zero_nhood
];
820 if (!vp56_rac_get_prob_branchy(c
, token_prob
[0])) // DCT_EOB
824 if (!vp56_rac_get_prob_branchy(c
, token_prob
[1])) { // DCT_0
826 token_prob
= probs
[vp8_coeff_band
[++i
]][0];
829 return nonzero
; // invalid input; blocks should end with EOB
832 if (!vp56_rac_get_prob_branchy(c
, token_prob
[2])) { // DCT_1
838 if (!vp56_rac_get_prob_branchy(c
, token_prob
[3])) { // DCT 2,3,4
839 coeff
= vp56_rac_get_prob(c
, token_prob
[4]);
841 coeff
+= vp56_rac_get_prob(c
, token_prob
[5]);
845 if (!vp56_rac_get_prob_branchy(c
, token_prob
[6])) {
846 if (!vp56_rac_get_prob_branchy(c
, token_prob
[7])) { // DCT_CAT1
847 coeff
= 5 + vp56_rac_get_prob(c
, vp8_dct_cat1_prob
[0]);
850 coeff
+= vp56_rac_get_prob(c
, vp8_dct_cat2_prob
[0]) << 1;
851 coeff
+= vp56_rac_get_prob(c
, vp8_dct_cat2_prob
[1]);
853 } else { // DCT_CAT3 and up
854 int a
= vp56_rac_get_prob(c
, token_prob
[8]);
855 int b
= vp56_rac_get_prob(c
, token_prob
[9+a
]);
856 int cat
= (a
<<1) + b
;
857 coeff
= 3 + (8<<cat
);
858 coeff
+= vp8_rac_get_coeff(c
, vp8_dct_cat_prob
[cat
]);
863 // todo: full [16] qmat? load into register?
864 block
[zigzag_scan
[i
]] = (vp8_rac_get(c
) ?
-coeff
: coeff
) * qmul
[!!i
];
871 static av_always_inline
872 void decode_mb_coeffs(VP8Context
*s
, VP56RangeCoder
*c
, VP8Macroblock
*mb
,
873 uint8_t t_nnz
[9], uint8_t l_nnz
[9])
875 LOCAL_ALIGNED_16(DCTELEM
, dc
,[16]);
876 int i
, x
, y
, luma_start
= 0, luma_ctx
= 3;
877 int nnz_pred
, nnz
, nnz_total
= 0;
878 int segment
= s
->segment
;
880 if (mb
->mode
!= MODE_I4x4
&& mb
->mode
!= VP8_MVMODE_SPLIT
) {
883 nnz_pred
= t_nnz
[8] + l_nnz
[8];
885 // decode DC values and do hadamard
886 nnz
= decode_block_coeffs(c
, dc
, s
->prob
->token
[1], 0, nnz_pred
,
887 s
->qmat
[segment
].luma_dc_qmul
);
888 l_nnz
[8] = t_nnz
[8] = !!nnz
;
890 s
->vp8dsp
.vp8_luma_dc_wht(s
->block
, dc
);
896 for (y
= 0; y
< 4; y
++)
897 for (x
= 0; x
< 4; x
++) {
898 nnz_pred
= l_nnz
[y
] + t_nnz
[x
];
899 nnz
= decode_block_coeffs(c
, s
->block
[y
][x
], s
->prob
->token
[luma_ctx
], luma_start
,
900 nnz_pred
, s
->qmat
[segment
].luma_qmul
);
901 // nnz+luma_start may be one more than the actual last index, but we don't care
902 s
->non_zero_count_cache
[y
][x
] = nnz
+ luma_start
;
903 t_nnz
[x
] = l_nnz
[y
] = !!nnz
;
908 // TODO: what to do about dimensions? 2nd dim for luma is x,
909 // but for chroma it's (y<<1)|x
910 for (i
= 4; i
< 6; i
++)
911 for (y
= 0; y
< 2; y
++)
912 for (x
= 0; x
< 2; x
++) {
913 nnz_pred
= l_nnz
[i
+2*y
] + t_nnz
[i
+2*x
];
914 nnz
= decode_block_coeffs(c
, s
->block
[i
][(y
<<1)+x
], s
->prob
->token
[2], 0,
915 nnz_pred
, s
->qmat
[segment
].chroma_qmul
);
916 s
->non_zero_count_cache
[i
][(y
<<1)+x
] = nnz
;
917 t_nnz
[i
+2*x
] = l_nnz
[i
+2*y
] = !!nnz
;
921 // if there were no coded coeffs despite the macroblock not being marked skip,
922 // we MUST not do the inner loop filter and should not do IDCT
923 // Since skip isn't used for bitstream prediction, just manually set it.
928 static av_always_inline
929 void backup_mb_border(uint8_t *top_border
, uint8_t *src_y
, uint8_t *src_cb
, uint8_t *src_cr
,
930 int linesize
, int uvlinesize
, int simple
)
932 AV_COPY128(top_border
, src_y
+ 15*linesize
);
934 AV_COPY64(top_border
+16, src_cb
+ 7*uvlinesize
);
935 AV_COPY64(top_border
+24, src_cr
+ 7*uvlinesize
);
939 static av_always_inline
940 void xchg_mb_border(uint8_t *top_border
, uint8_t *src_y
, uint8_t *src_cb
, uint8_t *src_cr
,
941 int linesize
, int uvlinesize
, int mb_x
, int mb_y
, int mb_width
,
942 int simple
, int xchg
)
944 uint8_t *top_border_m1
= top_border
-32; // for TL prediction
946 src_cb
-= uvlinesize
;
947 src_cr
-= uvlinesize
;
949 #define XCHG(a,b,xchg) do { \
950 if (xchg) AV_SWAP64(b,a); \
951 else AV_COPY64(b,a); \
954 XCHG(top_border_m1
+8, src_y
-8, xchg
);
955 XCHG(top_border
, src_y
, xchg
);
956 XCHG(top_border
+8, src_y
+8, 1);
957 if (mb_x
< mb_width
-1)
958 XCHG(top_border
+32, src_y
+16, 1);
960 // only copy chroma for normal loop filter
961 // or to initialize the top row to 127
962 if (!simple
|| !mb_y
) {
963 XCHG(top_border_m1
+16, src_cb
-8, xchg
);
964 XCHG(top_border_m1
+24, src_cr
-8, xchg
);
965 XCHG(top_border
+16, src_cb
, 1);
966 XCHG(top_border
+24, src_cr
, 1);
970 static av_always_inline
971 int check_intra_pred_mode(int mode
, int mb_x
, int mb_y
)
973 if (mode
== DC_PRED8x8
) {
975 mode
= mb_y ? TOP_DC_PRED8x8
: DC_128_PRED8x8
;
977 mode
= LEFT_DC_PRED8x8
;
983 static av_always_inline
984 void intra_predict(VP8Context
*s
, uint8_t *dst
[3], VP8Macroblock
*mb
,
985 uint8_t *intra4x4
, int mb_x
, int mb_y
)
987 int x
, y
, mode
, nnz
, tr
;
989 // for the first row, we need to run xchg_mb_border to init the top edge to 127
990 // otherwise, skip it if we aren't going to deblock
991 if (s
->deblock_filter
|| !mb_y
)
992 xchg_mb_border(s
->top_border
[mb_x
+1], dst
[0], dst
[1], dst
[2],
993 s
->linesize
, s
->uvlinesize
, mb_x
, mb_y
, s
->mb_width
,
994 s
->filter
.simple
, 1);
996 if (mb
->mode
< MODE_I4x4
) {
997 mode
= check_intra_pred_mode(mb
->mode
, mb_x
, mb_y
);
998 s
->hpc
.pred16x16
[mode
](dst
[0], s
->linesize
);
1000 uint8_t *ptr
= dst
[0];
1001 int stride
= s
->keyframe ? s
->b4_stride
: 4;
1003 // all blocks on the right edge of the macroblock use bottom edge
1004 // the top macroblock for their topright edge
1005 uint8_t *tr_right
= ptr
- s
->linesize
+ 16;
1007 // if we're on the right edge of the frame, said edge is extended
1008 // from the top macroblock
1009 if (mb_x
== s
->mb_width
-1) {
1010 tr
= tr_right
[-1]*0x01010101;
1011 tr_right
= (uint8_t *)&tr
;
1015 AV_ZERO128(s
->non_zero_count_cache
);
1017 for (y
= 0; y
< 4; y
++) {
1018 uint8_t *topright
= ptr
+ 4 - s
->linesize
;
1019 for (x
= 0; x
< 4; x
++) {
1021 topright
= tr_right
;
1023 s
->hpc
.pred4x4
[intra4x4
[x
]](ptr
+4*x
, topright
, s
->linesize
);
1025 nnz
= s
->non_zero_count_cache
[y
][x
];
1028 s
->vp8dsp
.vp8_idct_dc_add(ptr
+4*x
, s
->block
[y
][x
], s
->linesize
);
1030 s
->vp8dsp
.vp8_idct_add(ptr
+4*x
, s
->block
[y
][x
], s
->linesize
);
1035 ptr
+= 4*s
->linesize
;
1040 mode
= check_intra_pred_mode(s
->chroma_pred_mode
, mb_x
, mb_y
);
1041 s
->hpc
.pred8x8
[mode
](dst
[1], s
->uvlinesize
);
1042 s
->hpc
.pred8x8
[mode
](dst
[2], s
->uvlinesize
);
1044 if (s
->deblock_filter
|| !mb_y
)
1045 xchg_mb_border(s
->top_border
[mb_x
+1], dst
[0], dst
[1], dst
[2],
1046 s
->linesize
, s
->uvlinesize
, mb_x
, mb_y
, s
->mb_width
,
1047 s
->filter
.simple
, 0);
1051 * Generic MC function.
1053 * @param s VP8 decoding context
1054 * @param luma 1 for luma (Y) planes, 0 for chroma (Cb/Cr) planes
1055 * @param dst target buffer for block data at block position
1056 * @param src reference picture buffer at origin (0, 0)
1057 * @param mv motion vector (relative to block position) to get pixel data from
1058 * @param x_off horizontal position of block from origin (0, 0)
1059 * @param y_off vertical position of block from origin (0, 0)
1060 * @param block_w width of block (16, 8 or 4)
1061 * @param block_h height of block (always same as block_w)
1062 * @param width width of src/dst plane data
1063 * @param height height of src/dst plane data
1064 * @param linesize size of a single line of plane data, including padding
1065 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1067 static av_always_inline
1068 void vp8_mc(VP8Context
*s
, int luma
,
1069 uint8_t *dst
, uint8_t *src
, const VP56mv
*mv
,
1070 int x_off
, int y_off
, int block_w
, int block_h
,
1071 int width
, int height
, int linesize
,
1072 vp8_mc_func mc_func
[3][3])
1075 static const uint8_t idx
[8] = { 0, 1, 2, 1, 2, 1, 2, 1 };
1076 int mx
= (mv
->x
<< luma
)&7, mx_idx
= idx
[mx
];
1077 int my
= (mv
->y
<< luma
)&7, my_idx
= idx
[my
];
1079 x_off
+= mv
->x
>> (3 - luma
);
1080 y_off
+= mv
->y
>> (3 - luma
);
1083 src
+= y_off
* linesize
+ x_off
;
1084 if (x_off
< 2 || x_off
>= width
- block_w
- 3 ||
1085 y_off
< 2 || y_off
>= height
- block_h
- 3) {
1086 ff_emulated_edge_mc(s
->edge_emu_buffer
, src
- 2 * linesize
- 2, linesize
,
1087 block_w
+ 5, block_h
+ 5,
1088 x_off
- 2, y_off
- 2, width
, height
);
1089 src
= s
->edge_emu_buffer
+ 2 + linesize
* 2;
1091 mc_func
[my_idx
][mx_idx
](dst
, linesize
, src
, linesize
, block_h
, mx
, my
);
1093 mc_func
[0][0](dst
, linesize
, src
+ y_off
* linesize
+ x_off
, linesize
, block_h
, 0, 0);
1096 static av_always_inline
1097 void vp8_mc_part(VP8Context
*s
, uint8_t *dst
[3],
1098 AVFrame
*ref_frame
, int x_off
, int y_off
,
1099 int bx_off
, int by_off
,
1100 int block_w
, int block_h
,
1101 int width
, int height
, VP56mv
*mv
)
1106 vp8_mc(s
, 1, dst
[0] + by_off
* s
->linesize
+ bx_off
,
1107 ref_frame
->data
[0], mv
, x_off
+ bx_off
, y_off
+ by_off
,
1108 block_w
, block_h
, width
, height
, s
->linesize
,
1109 s
->put_pixels_tab
[block_w
== 8]);
1112 if (s
->profile
== 3) {
1116 x_off
>>= 1; y_off
>>= 1;
1117 bx_off
>>= 1; by_off
>>= 1;
1118 width
>>= 1; height
>>= 1;
1119 block_w
>>= 1; block_h
>>= 1;
1120 vp8_mc(s
, 0, dst
[1] + by_off
* s
->uvlinesize
+ bx_off
,
1121 ref_frame
->data
[1], &uvmv
, x_off
+ bx_off
, y_off
+ by_off
,
1122 block_w
, block_h
, width
, height
, s
->uvlinesize
,
1123 s
->put_pixels_tab
[1 + (block_w
== 4)]);
1124 vp8_mc(s
, 0, dst
[2] + by_off
* s
->uvlinesize
+ bx_off
,
1125 ref_frame
->data
[2], &uvmv
, x_off
+ bx_off
, y_off
+ by_off
,
1126 block_w
, block_h
, width
, height
, s
->uvlinesize
,
1127 s
->put_pixels_tab
[1 + (block_w
== 4)]);
1130 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1131 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1132 static av_always_inline
void prefetch_motion(VP8Context
*s
, VP8Macroblock
*mb
, int mb_x
, int mb_y
, int mb_xy
, int ref
)
1134 /* Don't prefetch refs that haven't been used very often this frame. */
1135 if (s
->ref_count
[ref
-1] > (mb_xy
>> 5)) {
1136 int x_off
= mb_x
<< 4, y_off
= mb_y
<< 4;
1137 int mx
= mb
->mv
.x
+ x_off
+ 8;
1138 int my
= mb
->mv
.y
+ y_off
;
1139 uint8_t **src
= s
->framep
[ref
]->data
;
1140 int off
= mx
+ (my
+ (mb_x
&3)*4)*s
->linesize
+ 64;
1141 s
->dsp
.prefetch(src
[0]+off
, s
->linesize
, 4);
1142 off
= (mx
>>1) + ((my
>>1) + (mb_x
&7))*s
->uvlinesize
+ 64;
1143 s
->dsp
.prefetch(src
[1]+off
, src
[2]-src
[1], 2);
1148 * Apply motion vectors to prediction buffer, chapter 18.
1150 static av_always_inline
1151 void inter_predict(VP8Context
*s
, uint8_t *dst
[3], VP8Macroblock
*mb
,
1154 int x_off
= mb_x
<< 4, y_off
= mb_y
<< 4;
1155 int width
= 16*s
->mb_width
, height
= 16*s
->mb_height
;
1156 AVFrame
*ref
= s
->framep
[mb
->ref_frame
];
1157 VP56mv
*bmv
= mb
->bmv
;
1159 if (mb
->mode
< VP8_MVMODE_SPLIT
) {
1160 vp8_mc_part(s
, dst
, ref
, x_off
, y_off
,
1161 0, 0, 16, 16, width
, height
, &mb
->mv
);
1162 } else switch (mb
->partitioning
) {
1163 case VP8_SPLITMVMODE_4x4
: {
1168 for (y
= 0; y
< 4; y
++) {
1169 for (x
= 0; x
< 4; x
++) {
1170 vp8_mc(s
, 1, dst
[0] + 4*y
*s
->linesize
+ x
*4,
1171 ref
->data
[0], &bmv
[4*y
+ x
],
1172 4*x
+ x_off
, 4*y
+ y_off
, 4, 4,
1173 width
, height
, s
->linesize
,
1174 s
->put_pixels_tab
[2]);
1179 x_off
>>= 1; y_off
>>= 1; width
>>= 1; height
>>= 1;
1180 for (y
= 0; y
< 2; y
++) {
1181 for (x
= 0; x
< 2; x
++) {
1182 uvmv
.x
= mb
->bmv
[ 2*y
* 4 + 2*x
].x
+
1183 mb
->bmv
[ 2*y
* 4 + 2*x
+1].x
+
1184 mb
->bmv
[(2*y
+1) * 4 + 2*x
].x
+
1185 mb
->bmv
[(2*y
+1) * 4 + 2*x
+1].x
;
1186 uvmv
.y
= mb
->bmv
[ 2*y
* 4 + 2*x
].y
+
1187 mb
->bmv
[ 2*y
* 4 + 2*x
+1].y
+
1188 mb
->bmv
[(2*y
+1) * 4 + 2*x
].y
+
1189 mb
->bmv
[(2*y
+1) * 4 + 2*x
+1].y
;
1190 uvmv
.x
= (uvmv
.x
+ 2 + (uvmv
.x
>> (INT_BIT
-1))) >> 2;
1191 uvmv
.y
= (uvmv
.y
+ 2 + (uvmv
.y
>> (INT_BIT
-1))) >> 2;
1192 if (s
->profile
== 3) {
1196 vp8_mc(s
, 0, dst
[1] + 4*y
*s
->uvlinesize
+ x
*4,
1197 ref
->data
[1], &uvmv
,
1198 4*x
+ x_off
, 4*y
+ y_off
, 4, 4,
1199 width
, height
, s
->uvlinesize
,
1200 s
->put_pixels_tab
[2]);
1201 vp8_mc(s
, 0, dst
[2] + 4*y
*s
->uvlinesize
+ x
*4,
1202 ref
->data
[2], &uvmv
,
1203 4*x
+ x_off
, 4*y
+ y_off
, 4, 4,
1204 width
, height
, s
->uvlinesize
,
1205 s
->put_pixels_tab
[2]);
1210 case VP8_SPLITMVMODE_16x8
:
1211 vp8_mc_part(s
, dst
, ref
, x_off
, y_off
,
1212 0, 0, 16, 8, width
, height
, &bmv
[0]);
1213 vp8_mc_part(s
, dst
, ref
, x_off
, y_off
,
1214 0, 8, 16, 8, width
, height
, &bmv
[1]);
1216 case VP8_SPLITMVMODE_8x16
:
1217 vp8_mc_part(s
, dst
, ref
, x_off
, y_off
,
1218 0, 0, 8, 16, width
, height
, &bmv
[0]);
1219 vp8_mc_part(s
, dst
, ref
, x_off
, y_off
,
1220 8, 0, 8, 16, width
, height
, &bmv
[1]);
1222 case VP8_SPLITMVMODE_8x8
:
1223 vp8_mc_part(s
, dst
, ref
, x_off
, y_off
,
1224 0, 0, 8, 8, width
, height
, &bmv
[0]);
1225 vp8_mc_part(s
, dst
, ref
, x_off
, y_off
,
1226 8, 0, 8, 8, width
, height
, &bmv
[1]);
1227 vp8_mc_part(s
, dst
, ref
, x_off
, y_off
,
1228 0, 8, 8, 8, width
, height
, &bmv
[2]);
1229 vp8_mc_part(s
, dst
, ref
, x_off
, y_off
,
1230 8, 8, 8, 8, width
, height
, &bmv
[3]);
1235 static av_always_inline
void idct_mb(VP8Context
*s
, uint8_t *dst
[3], VP8Macroblock
*mb
)
1239 if (mb
->mode
!= MODE_I4x4
) {
1240 uint8_t *y_dst
= dst
[0];
1241 for (y
= 0; y
< 4; y
++) {
1242 uint32_t nnz4
= AV_RN32A(s
->non_zero_count_cache
[y
]);
1244 if (nnz4
&~0x01010101) {
1245 for (x
= 0; x
< 4; x
++) {
1246 int nnz
= s
->non_zero_count_cache
[y
][x
];
1249 s
->vp8dsp
.vp8_idct_dc_add(y_dst
+4*x
, s
->block
[y
][x
], s
->linesize
);
1251 s
->vp8dsp
.vp8_idct_add(y_dst
+4*x
, s
->block
[y
][x
], s
->linesize
);
1255 s
->vp8dsp
.vp8_idct_dc_add4y(y_dst
, s
->block
[y
], s
->linesize
);
1258 y_dst
+= 4*s
->linesize
;
1262 for (ch
= 0; ch
< 2; ch
++) {
1263 uint32_t nnz4
= AV_RN32A(s
->non_zero_count_cache
[4+ch
]);
1265 uint8_t *ch_dst
= dst
[1+ch
];
1266 if (nnz4
&~0x01010101) {
1267 for (y
= 0; y
< 2; y
++) {
1268 for (x
= 0; x
< 2; x
++) {
1269 int nnz
= s
->non_zero_count_cache
[4+ch
][(y
<<1)+x
];
1272 s
->vp8dsp
.vp8_idct_dc_add(ch_dst
+4*x
, s
->block
[4+ch
][(y
<<1)+x
], s
->uvlinesize
);
1274 s
->vp8dsp
.vp8_idct_add(ch_dst
+4*x
, s
->block
[4+ch
][(y
<<1)+x
], s
->uvlinesize
);
1277 ch_dst
+= 4*s
->uvlinesize
;
1280 s
->vp8dsp
.vp8_idct_dc_add4uv(ch_dst
, s
->block
[4+ch
], s
->uvlinesize
);
1286 static av_always_inline
void filter_level_for_mb(VP8Context
*s
, VP8Macroblock
*mb
, VP8FilterStrength
*f
)
1288 int interior_limit
, filter_level
;
1290 if (s
->segmentation
.enabled
) {
1291 filter_level
= s
->segmentation
.filter_level
[s
->segment
];
1292 if (!s
->segmentation
.absolute_vals
)
1293 filter_level
+= s
->filter
.level
;
1295 filter_level
= s
->filter
.level
;
1297 if (s
->lf_delta
.enabled
) {
1298 filter_level
+= s
->lf_delta
.ref
[mb
->ref_frame
];
1300 if (mb
->ref_frame
== VP56_FRAME_CURRENT
) {
1301 if (mb
->mode
== MODE_I4x4
)
1302 filter_level
+= s
->lf_delta
.mode
[0];
1304 if (mb
->mode
== VP8_MVMODE_ZERO
)
1305 filter_level
+= s
->lf_delta
.mode
[1];
1306 else if (mb
->mode
== VP8_MVMODE_SPLIT
)
1307 filter_level
+= s
->lf_delta
.mode
[3];
1309 filter_level
+= s
->lf_delta
.mode
[2];
1312 filter_level
= av_clip(filter_level
, 0, 63);
1314 interior_limit
= filter_level
;
1315 if (s
->filter
.sharpness
) {
1316 interior_limit
>>= s
->filter
.sharpness
> 4 ?
2 : 1;
1317 interior_limit
= FFMIN(interior_limit
, 9 - s
->filter
.sharpness
);
1319 interior_limit
= FFMAX(interior_limit
, 1);
1321 f
->filter_level
= filter_level
;
1322 f
->inner_limit
= interior_limit
;
1323 f
->inner_filter
= !mb
->skip
|| mb
->mode
== MODE_I4x4
|| mb
->mode
== VP8_MVMODE_SPLIT
;
1326 static av_always_inline
void filter_mb(VP8Context
*s
, uint8_t *dst
[3], VP8FilterStrength
*f
, int mb_x
, int mb_y
)
1328 int mbedge_lim
, bedge_lim
, hev_thresh
;
1329 int filter_level
= f
->filter_level
;
1330 int inner_limit
= f
->inner_limit
;
1331 int inner_filter
= f
->inner_filter
;
1332 int linesize
= s
->linesize
;
1333 int uvlinesize
= s
->uvlinesize
;
1338 mbedge_lim
= 2*(filter_level
+2) + inner_limit
;
1339 bedge_lim
= 2* filter_level
+ inner_limit
;
1340 hev_thresh
= filter_level
>= 15;
1343 if (filter_level
>= 40)
1346 if (filter_level
>= 40)
1348 else if (filter_level
>= 20)
1353 s
->vp8dsp
.vp8_h_loop_filter16y(dst
[0], linesize
,
1354 mbedge_lim
, inner_limit
, hev_thresh
);
1355 s
->vp8dsp
.vp8_h_loop_filter8uv(dst
[1], dst
[2], uvlinesize
,
1356 mbedge_lim
, inner_limit
, hev_thresh
);
1360 s
->vp8dsp
.vp8_h_loop_filter16y_inner(dst
[0]+ 4, linesize
, bedge_lim
,
1361 inner_limit
, hev_thresh
);
1362 s
->vp8dsp
.vp8_h_loop_filter16y_inner(dst
[0]+ 8, linesize
, bedge_lim
,
1363 inner_limit
, hev_thresh
);
1364 s
->vp8dsp
.vp8_h_loop_filter16y_inner(dst
[0]+12, linesize
, bedge_lim
,
1365 inner_limit
, hev_thresh
);
1366 s
->vp8dsp
.vp8_h_loop_filter8uv_inner(dst
[1] + 4, dst
[2] + 4,
1367 uvlinesize
, bedge_lim
,
1368 inner_limit
, hev_thresh
);
1372 s
->vp8dsp
.vp8_v_loop_filter16y(dst
[0], linesize
,
1373 mbedge_lim
, inner_limit
, hev_thresh
);
1374 s
->vp8dsp
.vp8_v_loop_filter8uv(dst
[1], dst
[2], uvlinesize
,
1375 mbedge_lim
, inner_limit
, hev_thresh
);
1379 s
->vp8dsp
.vp8_v_loop_filter16y_inner(dst
[0]+ 4*linesize
,
1380 linesize
, bedge_lim
,
1381 inner_limit
, hev_thresh
);
1382 s
->vp8dsp
.vp8_v_loop_filter16y_inner(dst
[0]+ 8*linesize
,
1383 linesize
, bedge_lim
,
1384 inner_limit
, hev_thresh
);
1385 s
->vp8dsp
.vp8_v_loop_filter16y_inner(dst
[0]+12*linesize
,
1386 linesize
, bedge_lim
,
1387 inner_limit
, hev_thresh
);
1388 s
->vp8dsp
.vp8_v_loop_filter8uv_inner(dst
[1] + 4 * uvlinesize
,
1389 dst
[2] + 4 * uvlinesize
,
1390 uvlinesize
, bedge_lim
,
1391 inner_limit
, hev_thresh
);
1395 static av_always_inline
void filter_mb_simple(VP8Context
*s
, uint8_t *dst
, VP8FilterStrength
*f
, int mb_x
, int mb_y
)
1397 int mbedge_lim
, bedge_lim
;
1398 int filter_level
= f
->filter_level
;
1399 int inner_limit
= f
->inner_limit
;
1400 int inner_filter
= f
->inner_filter
;
1401 int linesize
= s
->linesize
;
1406 mbedge_lim
= 2*(filter_level
+2) + inner_limit
;
1407 bedge_lim
= 2* filter_level
+ inner_limit
;
1410 s
->vp8dsp
.vp8_h_loop_filter_simple(dst
, linesize
, mbedge_lim
);
1412 s
->vp8dsp
.vp8_h_loop_filter_simple(dst
+ 4, linesize
, bedge_lim
);
1413 s
->vp8dsp
.vp8_h_loop_filter_simple(dst
+ 8, linesize
, bedge_lim
);
1414 s
->vp8dsp
.vp8_h_loop_filter_simple(dst
+12, linesize
, bedge_lim
);
1418 s
->vp8dsp
.vp8_v_loop_filter_simple(dst
, linesize
, mbedge_lim
);
1420 s
->vp8dsp
.vp8_v_loop_filter_simple(dst
+ 4*linesize
, linesize
, bedge_lim
);
1421 s
->vp8dsp
.vp8_v_loop_filter_simple(dst
+ 8*linesize
, linesize
, bedge_lim
);
1422 s
->vp8dsp
.vp8_v_loop_filter_simple(dst
+12*linesize
, linesize
, bedge_lim
);
1426 static void filter_mb_row(VP8Context
*s
, int mb_y
)
1428 VP8FilterStrength
*f
= s
->filter_strength
;
1430 s
->framep
[VP56_FRAME_CURRENT
]->data
[0] + 16*mb_y
*s
->linesize
,
1431 s
->framep
[VP56_FRAME_CURRENT
]->data
[1] + 8*mb_y
*s
->uvlinesize
,
1432 s
->framep
[VP56_FRAME_CURRENT
]->data
[2] + 8*mb_y
*s
->uvlinesize
1436 for (mb_x
= 0; mb_x
< s
->mb_width
; mb_x
++) {
1437 backup_mb_border(s
->top_border
[mb_x
+1], dst
[0], dst
[1], dst
[2], s
->linesize
, s
->uvlinesize
, 0);
1438 filter_mb(s
, dst
, f
++, mb_x
, mb_y
);
1445 static void filter_mb_row_simple(VP8Context
*s
, int mb_y
)
1447 VP8FilterStrength
*f
= s
->filter_strength
;
1448 uint8_t *dst
= s
->framep
[VP56_FRAME_CURRENT
]->data
[0] + 16*mb_y
*s
->linesize
;
1451 for (mb_x
= 0; mb_x
< s
->mb_width
; mb_x
++) {
1452 backup_mb_border(s
->top_border
[mb_x
+1], dst
, NULL
, NULL
, s
->linesize
, 0, 1);
1453 filter_mb_simple(s
, dst
, f
++, mb_x
, mb_y
);
1458 static int vp8_decode_frame(AVCodecContext
*avctx
, void *data
, int *data_size
,
1461 VP8Context
*s
= avctx
->priv_data
;
1462 int ret
, mb_x
, mb_y
, i
, y
, referenced
;
1463 enum AVDiscard skip_thresh
;
1464 AVFrame
*av_uninit(curframe
);
1466 if ((ret
= decode_frame_header(s
, avpkt
->data
, avpkt
->size
)) < 0)
1469 referenced
= s
->update_last
|| s
->update_golden
== VP56_FRAME_CURRENT
1470 || s
->update_altref
== VP56_FRAME_CURRENT
;
1472 skip_thresh
= !referenced ? AVDISCARD_NONREF
:
1473 !s
->keyframe ? AVDISCARD_NONKEY
: AVDISCARD_ALL
;
1475 if (avctx
->skip_frame
>= skip_thresh
) {
1479 s
->deblock_filter
= s
->filter
.level
&& avctx
->skip_loop_filter
< skip_thresh
;
1481 for (i
= 0; i
< 4; i
++)
1482 if (&s
->frames
[i
] != s
->framep
[VP56_FRAME_PREVIOUS
] &&
1483 &s
->frames
[i
] != s
->framep
[VP56_FRAME_GOLDEN
] &&
1484 &s
->frames
[i
] != s
->framep
[VP56_FRAME_GOLDEN2
]) {
1485 curframe
= s
->framep
[VP56_FRAME_CURRENT
] = &s
->frames
[i
];
1488 if (curframe
->data
[0])
1489 avctx
->release_buffer(avctx
, curframe
);
1491 curframe
->key_frame
= s
->keyframe
;
1492 curframe
->pict_type
= s
->keyframe ? FF_I_TYPE
: FF_P_TYPE
;
1493 curframe
->reference
= referenced ?
3 : 0;
1494 if ((ret
= avctx
->get_buffer(avctx
, curframe
))) {
1495 av_log(avctx
, AV_LOG_ERROR
, "get_buffer() failed!\n");
1499 // Given that arithmetic probabilities are updated every frame, it's quite likely
1500 // that the values we have on a random interframe are complete junk if we didn't
1501 // start decode on a keyframe. So just don't display anything rather than junk.
1502 if (!s
->keyframe
&& (!s
->framep
[VP56_FRAME_PREVIOUS
] ||
1503 !s
->framep
[VP56_FRAME_GOLDEN
] ||
1504 !s
->framep
[VP56_FRAME_GOLDEN2
])) {
1505 av_log(avctx
, AV_LOG_WARNING
, "Discarding interframe without a prior keyframe!\n");
1506 return AVERROR_INVALIDDATA
;
1509 s
->linesize
= curframe
->linesize
[0];
1510 s
->uvlinesize
= curframe
->linesize
[1];
1512 if (!s
->edge_emu_buffer
)
1513 s
->edge_emu_buffer
= av_malloc(21*s
->linesize
);
1515 memset(s
->top_nnz
, 0, s
->mb_width
*sizeof(*s
->top_nnz
));
1517 /* Zero macroblock structures for top/left prediction from outside the frame. */
1518 memset(s
->macroblocks
, 0, (s
->mb_width
+ s
->mb_height
*2)*sizeof(*s
->macroblocks
));
1520 // top edge of 127 for intra prediction
1521 memset(s
->top_border
, 127, (s
->mb_width
+1)*sizeof(*s
->top_border
));
1522 memset(s
->ref_count
, 0, sizeof(s
->ref_count
));
1524 for (mb_y
= 0; mb_y
< s
->mb_height
; mb_y
++) {
1525 VP56RangeCoder
*c
= &s
->coeff_partition
[mb_y
& (s
->num_coeff_partitions
-1)];
1526 VP8Macroblock
*mb
= s
->macroblocks
+ (s
->mb_height
- mb_y
- 1)*2;
1527 uint8_t *intra4x4
= s
->intra4x4_pred_mode
+ 4*mb_y
*s
->b4_stride
;
1528 uint8_t *segment_map
= s
->segmentation_map
+ mb_y
*s
->mb_stride
;
1529 int mb_xy
= mb_y
* s
->mb_stride
;
1531 curframe
->data
[0] + 16*mb_y
*s
->linesize
,
1532 curframe
->data
[1] + 8*mb_y
*s
->uvlinesize
,
1533 curframe
->data
[2] + 8*mb_y
*s
->uvlinesize
1536 memset(s
->left_nnz
, 0, sizeof(s
->left_nnz
));
1538 // left edge of 129 for intra prediction
1539 if (!(avctx
->flags
& CODEC_FLAG_EMU_EDGE
))
1540 for (i
= 0; i
< 3; i
++)
1541 for (y
= 0; y
< 16>>!!i
; y
++)
1542 dst
[i
][y
*curframe
->linesize
[i
]-1] = 129;
1544 memset(s
->top_border
, 129, sizeof(*s
->top_border
));
1546 for (mb_x
= 0; mb_x
< s
->mb_width
; mb_x
++, mb_xy
++, mb
++) {
1547 uint8_t *intra4x4_mb
= s
->keyframe ? intra4x4
+ 4*mb_x
: s
->intra4x4_pred_mode_mb
;
1548 uint8_t *segment_mb
= segment_map
+mb_x
;
1550 /* Prefetch the current frame, 4 MBs ahead */
1551 s
->dsp
.prefetch(dst
[0] + (mb_x
&3)*4*s
->linesize
+ 64, s
->linesize
, 4);
1552 s
->dsp
.prefetch(dst
[1] + (mb_x
&7)*s
->uvlinesize
+ 64, dst
[2] - dst
[1], 2);
1554 decode_mb_mode(s
, mb
, mb_x
, mb_y
, intra4x4_mb
, segment_mb
);
1556 prefetch_motion(s
, mb
, mb_x
, mb_y
, mb_xy
, VP56_FRAME_PREVIOUS
);
1559 decode_mb_coeffs(s
, c
, mb
, s
->top_nnz
[mb_x
], s
->left_nnz
);
1561 if (mb
->mode
<= MODE_I4x4
)
1562 intra_predict(s
, dst
, mb
, intra4x4_mb
, mb_x
, mb_y
);
1564 inter_predict(s
, dst
, mb
, mb_x
, mb_y
);
1566 prefetch_motion(s
, mb
, mb_x
, mb_y
, mb_xy
, VP56_FRAME_GOLDEN
);
1569 idct_mb(s
, dst
, mb
);
1571 AV_ZERO64(s
->left_nnz
);
1572 AV_WN64(s
->top_nnz
[mb_x
], 0); // array of 9, so unaligned
1574 // Reset DC block predictors if they would exist if the mb had coefficients
1575 if (mb
->mode
!= MODE_I4x4
&& mb
->mode
!= VP8_MVMODE_SPLIT
) {
1577 s
->top_nnz
[mb_x
][8] = 0;
1581 if (s
->deblock_filter
)
1582 filter_level_for_mb(s
, mb
, &s
->filter_strength
[mb_x
]);
1584 prefetch_motion(s
, mb
, mb_x
, mb_y
, mb_xy
, VP56_FRAME_GOLDEN2
);
1590 if (s
->deblock_filter
) {
1591 if (s
->filter
.simple
)
1592 filter_mb_row_simple(s
, mb_y
);
1594 filter_mb_row(s
, mb_y
);
1599 // if future frames don't use the updated probabilities,
1600 // reset them to the values we saved
1601 if (!s
->update_probabilities
)
1602 s
->prob
[0] = s
->prob
[1];
1604 // check if golden and altref are swapped
1605 if (s
->update_altref
== VP56_FRAME_GOLDEN
&&
1606 s
->update_golden
== VP56_FRAME_GOLDEN2
)
1607 FFSWAP(AVFrame
*, s
->framep
[VP56_FRAME_GOLDEN
], s
->framep
[VP56_FRAME_GOLDEN2
]);
1609 if (s
->update_altref
!= VP56_FRAME_NONE
)
1610 s
->framep
[VP56_FRAME_GOLDEN2
] = s
->framep
[s
->update_altref
];
1612 if (s
->update_golden
!= VP56_FRAME_NONE
)
1613 s
->framep
[VP56_FRAME_GOLDEN
] = s
->framep
[s
->update_golden
];
1616 if (s
->update_last
) // move cur->prev
1617 s
->framep
[VP56_FRAME_PREVIOUS
] = s
->framep
[VP56_FRAME_CURRENT
];
1619 // release no longer referenced frames
1620 for (i
= 0; i
< 4; i
++)
1621 if (s
->frames
[i
].data
[0] &&
1622 &s
->frames
[i
] != s
->framep
[VP56_FRAME_CURRENT
] &&
1623 &s
->frames
[i
] != s
->framep
[VP56_FRAME_PREVIOUS
] &&
1624 &s
->frames
[i
] != s
->framep
[VP56_FRAME_GOLDEN
] &&
1625 &s
->frames
[i
] != s
->framep
[VP56_FRAME_GOLDEN2
])
1626 avctx
->release_buffer(avctx
, &s
->frames
[i
]);
1628 if (!s
->invisible
) {
1629 *(AVFrame
*)data
= *s
->framep
[VP56_FRAME_CURRENT
];
1630 *data_size
= sizeof(AVFrame
);
1636 static av_cold
int vp8_decode_init(AVCodecContext
*avctx
)
1638 VP8Context
*s
= avctx
->priv_data
;
1641 avctx
->pix_fmt
= PIX_FMT_YUV420P
;
1643 dsputil_init(&s
->dsp
, avctx
);
1644 ff_h264_pred_init(&s
->hpc
, CODEC_ID_VP8
);
1645 ff_vp8dsp_init(&s
->vp8dsp
);
1647 // intra pred needs edge emulation among other things
1648 if (avctx
->flags
&CODEC_FLAG_EMU_EDGE
) {
1649 av_log(avctx
, AV_LOG_ERROR
, "Edge emulation not supported\n");
1650 return AVERROR_PATCHWELCOME
;
1656 static av_cold
int vp8_decode_free(AVCodecContext
*avctx
)
1658 vp8_decode_flush(avctx
);
1662 AVCodec vp8_decoder
= {
1672 .flush
= vp8_decode_flush
,
1673 .long_name
= NULL_IF_CONFIG_SMALL("On2 VP8"),