Add HEVC decoder
[libav.git] / libavcodec / hevc.c
CommitLineData
064698d3
GM
1/*
2 * HEVC video decoder
3 *
4 * Copyright (C) 2012 - 2013 Guillaume Martres
5 * Copyright (C) 2012 - 2013 Mickael Raulet
6 * Copyright (C) 2012 - 2013 Gildas Cocherel
7 * Copyright (C) 2012 - 2013 Wassim Hamidouche
8 *
9 * This file is part of Libav.
10 *
11 * Libav is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
15 *
16 * Libav is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with Libav; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 */
25
26#include "libavutil/attributes.h"
27#include "libavutil/common.h"
28#include "libavutil/internal.h"
29#include "libavutil/md5.h"
30#include "libavutil/opt.h"
31#include "libavutil/pixdesc.h"
32
33#include "bytestream.h"
34#include "cabac_functions.h"
35#include "dsputil.h"
36#include "golomb.h"
37#include "hevc.h"
38
39const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
40const uint8_t ff_hevc_qpel_extra_after[4] = { 0, 3, 4, 4 };
41const uint8_t ff_hevc_qpel_extra[4] = { 0, 6, 7, 6 };
42
43static const uint8_t scan_1x1[1] = { 0 };
44
45static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
46
47static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
48
49static const uint8_t horiz_scan4x4_x[16] = {
50 0, 1, 2, 3,
51 0, 1, 2, 3,
52 0, 1, 2, 3,
53 0, 1, 2, 3,
54};
55
56static const uint8_t horiz_scan4x4_y[16] = {
57 0, 0, 0, 0,
58 1, 1, 1, 1,
59 2, 2, 2, 2,
60 3, 3, 3, 3,
61};
62
63static const uint8_t horiz_scan8x8_inv[8][8] = {
64 { 0, 1, 2, 3, 16, 17, 18, 19, },
65 { 4, 5, 6, 7, 20, 21, 22, 23, },
66 { 8, 9, 10, 11, 24, 25, 26, 27, },
67 { 12, 13, 14, 15, 28, 29, 30, 31, },
68 { 32, 33, 34, 35, 48, 49, 50, 51, },
69 { 36, 37, 38, 39, 52, 53, 54, 55, },
70 { 40, 41, 42, 43, 56, 57, 58, 59, },
71 { 44, 45, 46, 47, 60, 61, 62, 63, },
72};
73
74static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
75
76static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
77
78static const uint8_t diag_scan2x2_inv[2][2] = {
79 { 0, 2, },
80 { 1, 3, },
81};
82
83const uint8_t ff_hevc_diag_scan4x4_x[16] = {
84 0, 0, 1, 0,
85 1, 2, 0, 1,
86 2, 3, 1, 2,
87 3, 2, 3, 3,
88};
89
90const uint8_t ff_hevc_diag_scan4x4_y[16] = {
91 0, 1, 0, 2,
92 1, 0, 3, 2,
93 1, 0, 3, 2,
94 1, 3, 2, 3,
95};
96
97static const uint8_t diag_scan4x4_inv[4][4] = {
98 { 0, 2, 5, 9, },
99 { 1, 4, 8, 12, },
100 { 3, 7, 11, 14, },
101 { 6, 10, 13, 15, },
102};
103
104const uint8_t ff_hevc_diag_scan8x8_x[64] = {
105 0, 0, 1, 0,
106 1, 2, 0, 1,
107 2, 3, 0, 1,
108 2, 3, 4, 0,
109 1, 2, 3, 4,
110 5, 0, 1, 2,
111 3, 4, 5, 6,
112 0, 1, 2, 3,
113 4, 5, 6, 7,
114 1, 2, 3, 4,
115 5, 6, 7, 2,
116 3, 4, 5, 6,
117 7, 3, 4, 5,
118 6, 7, 4, 5,
119 6, 7, 5, 6,
120 7, 6, 7, 7,
121};
122
123const uint8_t ff_hevc_diag_scan8x8_y[64] = {
124 0, 1, 0, 2,
125 1, 0, 3, 2,
126 1, 0, 4, 3,
127 2, 1, 0, 5,
128 4, 3, 2, 1,
129 0, 6, 5, 4,
130 3, 2, 1, 0,
131 7, 6, 5, 4,
132 3, 2, 1, 0,
133 7, 6, 5, 4,
134 3, 2, 1, 7,
135 6, 5, 4, 3,
136 2, 7, 6, 5,
137 4, 3, 7, 6,
138 5, 4, 7, 6,
139 5, 7, 6, 7,
140};
141
142static const uint8_t diag_scan8x8_inv[8][8] = {
143 { 0, 2, 5, 9, 14, 20, 27, 35, },
144 { 1, 4, 8, 13, 19, 26, 34, 42, },
145 { 3, 7, 12, 18, 25, 33, 41, 48, },
146 { 6, 11, 17, 24, 32, 40, 47, 53, },
147 { 10, 16, 23, 31, 39, 46, 52, 57, },
148 { 15, 22, 30, 38, 45, 51, 56, 60, },
149 { 21, 29, 37, 44, 50, 55, 59, 62, },
150 { 28, 36, 43, 49, 54, 58, 61, 63, },
151};
152
153/**
154 * NOTE: Each function hls_foo correspond to the function foo in the
155 * specification (HLS stands for High Level Syntax).
156 */
157
158/**
159 * Section 5.7
160 */
161
162/* free everything allocated by pic_arrays_init() */
163static void pic_arrays_free(HEVCContext *s)
164{
165 av_freep(&s->sao);
166 av_freep(&s->deblock);
167 av_freep(&s->split_cu_flag);
168
169 av_freep(&s->skip_flag);
170 av_freep(&s->tab_ct_depth);
171
172 av_freep(&s->tab_ipm);
173 av_freep(&s->cbf_luma);
174 av_freep(&s->is_pcm);
175
176 av_freep(&s->qp_y_tab);
177 av_freep(&s->tab_slice_address);
178 av_freep(&s->filter_slice_edges);
179
180 av_freep(&s->horizontal_bs);
181 av_freep(&s->vertical_bs);
182
183 av_buffer_pool_uninit(&s->tab_mvf_pool);
184 av_buffer_pool_uninit(&s->rpl_tab_pool);
185}
186
187/* allocate arrays that depend on frame dimensions */
188static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
189{
190 int log2_min_cb_size = sps->log2_min_cb_size;
191 int width = sps->width;
192 int height = sps->height;
193 int pic_size = width * height;
194 int pic_size_in_ctb = ((width >> log2_min_cb_size) + 1) *
195 ((height >> log2_min_cb_size) + 1);
196 int ctb_count = sps->ctb_width * sps->ctb_height;
197 int min_pu_size = sps->min_pu_width * sps->min_pu_height;
198
199 s->bs_width = width >> 3;
200 s->bs_height = height >> 3;
201
202 s->sao = av_mallocz_array(ctb_count, sizeof(*s->sao));
203 s->deblock = av_mallocz_array(ctb_count, sizeof(*s->deblock));
204 s->split_cu_flag = av_malloc(pic_size);
205 if (!s->sao || !s->deblock || !s->split_cu_flag)
206 goto fail;
207
208 s->skip_flag = av_malloc(pic_size_in_ctb);
209 s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
210 if (!s->skip_flag || !s->tab_ct_depth)
211 goto fail;
212
213 s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
214 s->tab_ipm = av_malloc(min_pu_size);
215 s->is_pcm = av_malloc(min_pu_size);
216 if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
217 goto fail;
218
219 s->filter_slice_edges = av_malloc(ctb_count);
220 s->tab_slice_address = av_malloc(pic_size_in_ctb *
221 sizeof(*s->tab_slice_address));
222 s->qp_y_tab = av_malloc(pic_size_in_ctb *
223 sizeof(*s->qp_y_tab));
224 if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
225 goto fail;
226
227 s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
228 s->vertical_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
229 if (!s->horizontal_bs || !s->vertical_bs)
230 goto fail;
231
232 s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
233 av_buffer_alloc);
234 s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
235 av_buffer_allocz);
236 if (!s->tab_mvf_pool || !s->rpl_tab_pool)
237 goto fail;
238
239 return 0;
240
241fail:
242 pic_arrays_free(s);
243 return AVERROR(ENOMEM);
244}
245
246static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
247{
248 int i = 0;
249 int j = 0;
250 uint8_t luma_weight_l0_flag[16];
251 uint8_t chroma_weight_l0_flag[16];
252 uint8_t luma_weight_l1_flag[16];
253 uint8_t chroma_weight_l1_flag[16];
254
255 s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
256 if (s->sps->chroma_format_idc != 0) {
257 int delta = get_se_golomb(gb);
258 s->sh.chroma_log2_weight_denom = av_clip_c(s->sh.luma_log2_weight_denom + delta, 0, 7);
259 }
260
261 for (i = 0; i < s->sh.nb_refs[L0]; i++) {
262 luma_weight_l0_flag[i] = get_bits1(gb);
263 if (!luma_weight_l0_flag[i]) {
264 s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
265 s->sh.luma_offset_l0[i] = 0;
266 }
267 }
268 if (s->sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
269 for (i = 0; i < s->sh.nb_refs[L0]; i++)
270 chroma_weight_l0_flag[i] = get_bits1(gb);
271 } else {
272 for (i = 0; i < s->sh.nb_refs[L0]; i++)
273 chroma_weight_l0_flag[i] = 0;
274 }
275 for (i = 0; i < s->sh.nb_refs[L0]; i++) {
276 if (luma_weight_l0_flag[i]) {
277 int delta_luma_weight_l0 = get_se_golomb(gb);
278 s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
279 s->sh.luma_offset_l0[i] = get_se_golomb(gb);
280 }
281 if (chroma_weight_l0_flag[i]) {
282 for (j = 0; j < 2; j++) {
283 int delta_chroma_weight_l0 = get_se_golomb(gb);
284 int delta_chroma_offset_l0 = get_se_golomb(gb);
285 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
286 s->sh.chroma_offset_l0[i][j] = av_clip_c((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
287 >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
288 }
289 } else {
290 s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
291 s->sh.chroma_offset_l0[i][0] = 0;
292 s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
293 s->sh.chroma_offset_l0[i][1] = 0;
294 }
295 }
296 if (s->sh.slice_type == B_SLICE) {
297 for (i = 0; i < s->sh.nb_refs[L1]; i++) {
298 luma_weight_l1_flag[i] = get_bits1(gb);
299 if (!luma_weight_l1_flag[i]) {
300 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
301 s->sh.luma_offset_l1[i] = 0;
302 }
303 }
304 if (s->sps->chroma_format_idc != 0) {
305 for (i = 0; i < s->sh.nb_refs[L1]; i++)
306 chroma_weight_l1_flag[i] = get_bits1(gb);
307 } else {
308 for (i = 0; i < s->sh.nb_refs[L1]; i++)
309 chroma_weight_l1_flag[i] = 0;
310 }
311 for (i = 0; i < s->sh.nb_refs[L1]; i++) {
312 if (luma_weight_l1_flag[i]) {
313 int delta_luma_weight_l1 = get_se_golomb(gb);
314 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
315 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
316 }
317 if (chroma_weight_l1_flag[i]) {
318 for (j = 0; j < 2; j++) {
319 int delta_chroma_weight_l1 = get_se_golomb(gb);
320 int delta_chroma_offset_l1 = get_se_golomb(gb);
321 s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
322 s->sh.chroma_offset_l1[i][j] = av_clip_c((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
323 >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
324 }
325 } else {
326 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
327 s->sh.chroma_offset_l1[i][0] = 0;
328 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
329 s->sh.chroma_offset_l1[i][1] = 0;
330 }
331 }
332 }
333}
334
335static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
336{
337 const HEVCSPS *sps = s->sps;
338 int max_poc_lsb = 1 << sps->log2_max_poc_lsb;
339 int prev_delta_msb = 0;
340 int nb_sps = 0, nb_sh;
341 int i;
342
343 rps->nb_refs = 0;
344 if (!sps->long_term_ref_pics_present_flag)
345 return 0;
346
347 if (sps->num_long_term_ref_pics_sps > 0)
348 nb_sps = get_ue_golomb_long(gb);
349 nb_sh = get_ue_golomb_long(gb);
350
351 if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
352 return AVERROR_INVALIDDATA;
353
354 rps->nb_refs = nb_sh + nb_sps;
355
356 for (i = 0; i < rps->nb_refs; i++) {
357 uint8_t delta_poc_msb_present;
358
359 if (i < nb_sps) {
360 uint8_t lt_idx_sps = 0;
361
362 if (sps->num_long_term_ref_pics_sps > 1)
363 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
364
365 rps->poc[i] = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
366 rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
367 } else {
368 rps->poc[i] = get_bits(gb, sps->log2_max_poc_lsb);
369 rps->used[i] = get_bits1(gb);
370 }
371
372 delta_poc_msb_present = get_bits1(gb);
373 if (delta_poc_msb_present) {
374 int delta = get_ue_golomb_long(gb);
375
376 if (i && i != nb_sps)
377 delta += prev_delta_msb;
378
379 rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
380 prev_delta_msb = delta;
381 }
382 }
383
384 return 0;
385}
386
387static int set_sps(HEVCContext *s, const HEVCSPS *sps)
388{
389 int ret;
390
391 pic_arrays_free(s);
392 ret = pic_arrays_init(s, sps);
393 if (ret < 0)
394 goto fail;
395
396 s->avctx->coded_width = sps->width;
397 s->avctx->coded_height = sps->height;
398 s->avctx->width = sps->output_width;
399 s->avctx->height = sps->output_height;
400 s->avctx->pix_fmt = sps->pix_fmt;
401 s->avctx->sample_aspect_ratio = sps->vui.sar;
402 s->avctx->has_b_frames = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
403
404 ff_hevc_pred_init(&s->hpc, sps->bit_depth);
405 ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
406 ff_videodsp_init (&s->vdsp, sps->bit_depth);
407
408 if (sps->sao_enabled) {
409 av_frame_unref(s->tmp_frame);
410 ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
411 if (ret < 0)
412 goto fail;
413 s->frame = s->tmp_frame;
414 }
415
416 s->sps = sps;
417 s->vps = s->vps_list[s->sps->vps_id];
418 return 0;
419
420fail:
421 pic_arrays_free(s);
422 s->sps = NULL;
423 return ret;
424}
425
426static int hls_slice_header(HEVCContext *s)
427{
428 GetBitContext *gb = &s->HEVClc.gb;
429 SliceHeader *sh = &s->sh;
430 int i, ret;
431
432 // Coded parameters
433 sh->first_slice_in_pic_flag = get_bits1(gb);
434 if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
435 s->seq_decode = (s->seq_decode + 1) & 0xff;
436 s->max_ra = INT_MAX;
437 if (IS_IDR(s))
438 ff_hevc_clear_refs(s);
439 }
440 if (s->nal_unit_type >= 16 && s->nal_unit_type <= 23)
441 sh->no_output_of_prior_pics_flag = get_bits1(gb);
442
443 sh->pps_id = get_ue_golomb_long(gb);
444 if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
445 av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
446 return AVERROR_INVALIDDATA;
447 }
448 if (!sh->first_slice_in_pic_flag &&
449 s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
450 av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
451 return AVERROR_INVALIDDATA;
452 }
453 s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
454
455 if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
456 s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
457
458 ff_hevc_clear_refs(s);
459 ret = set_sps(s, s->sps);
460 if (ret < 0)
461 return ret;
462
463 s->seq_decode = (s->seq_decode + 1) & 0xff;
464 s->max_ra = INT_MAX;
465 }
466
467 sh->dependent_slice_segment_flag = 0;
468 if (!sh->first_slice_in_pic_flag) {
469 int slice_address_length;
470
471 if (s->pps->dependent_slice_segments_enabled_flag)
472 sh->dependent_slice_segment_flag = get_bits1(gb);
473
474 slice_address_length = av_ceil_log2(s->sps->ctb_width *
475 s->sps->ctb_height);
476 sh->slice_segment_addr = get_bits(gb, slice_address_length);
477 if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
478 av_log(s->avctx, AV_LOG_ERROR,
479 "Invalid slice segment address: %u.\n",
480 sh->slice_segment_addr);
481 return AVERROR_INVALIDDATA;
482 }
483
484 if (!sh->dependent_slice_segment_flag) {
485 sh->slice_addr = sh->slice_segment_addr;
486 s->slice_idx++;
487 }
488 } else {
489 sh->slice_segment_addr = sh->slice_addr = 0;
490 s->slice_idx = 0;
491 s->slice_initialized = 0;
492 }
493
494 if (!sh->dependent_slice_segment_flag) {
495 s->slice_initialized = 0;
496
497 for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
498 skip_bits(gb, 1); // slice_reserved_undetermined_flag[]
499
500 sh->slice_type = get_ue_golomb_long(gb);
501 if (!(sh->slice_type == I_SLICE ||
502 sh->slice_type == P_SLICE ||
503 sh->slice_type == B_SLICE)) {
504 av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
505 sh->slice_type);
506 return AVERROR_INVALIDDATA;
507 }
508 if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
509 av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
510 return AVERROR_INVALIDDATA;
511 }
512
513 if (s->pps->output_flag_present_flag)
514 sh->pic_output_flag = get_bits1(gb);
515
516 if (s->sps->separate_colour_plane_flag)
517 sh->colour_plane_id = get_bits(gb, 2);
518
519 if (!IS_IDR(s)) {
520 int short_term_ref_pic_set_sps_flag, poc;
521
522 sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
523 poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
524 if (!sh->first_slice_in_pic_flag && poc != s->poc) {
525 av_log(s->avctx, AV_LOG_WARNING,
526 "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
527 if (s->avctx->err_recognition & AV_EF_EXPLODE)
528 return AVERROR_INVALIDDATA;
529 poc = s->poc;
530 }
531 s->poc = poc;
532
533 short_term_ref_pic_set_sps_flag = get_bits1(gb);
534 if (!short_term_ref_pic_set_sps_flag) {
535 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
536 if (ret < 0)
537 return ret;
538
539 sh->short_term_rps = &sh->slice_rps;
540 } else {
541 int numbits, rps_idx;
542
543 if (!s->sps->nb_st_rps) {
544 av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
545 return AVERROR_INVALIDDATA;
546 }
547
548 numbits = av_ceil_log2(s->sps->nb_st_rps);
549 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
550 sh->short_term_rps = &s->sps->st_rps[rps_idx];
551 }
552
553 ret = decode_lt_rps(s, &sh->long_term_rps, gb);
554 if (ret < 0) {
555 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
556 if (s->avctx->err_recognition & AV_EF_EXPLODE)
557 return AVERROR_INVALIDDATA;
558 }
559
560 if (s->sps->sps_temporal_mvp_enabled_flag)
561 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
562 else
563 sh->slice_temporal_mvp_enabled_flag = 0;
564 } else {
565 s->sh.short_term_rps = NULL;
566 s->poc = 0;
567 }
568
569 /* 8.3.1 */
570 if (s->temporal_id == 0 &&
571 s->nal_unit_type != NAL_TRAIL_N &&
572 s->nal_unit_type != NAL_TSA_N &&
573 s->nal_unit_type != NAL_STSA_N &&
574 s->nal_unit_type != NAL_RADL_N &&
575 s->nal_unit_type != NAL_RADL_R &&
576 s->nal_unit_type != NAL_RASL_N &&
577 s->nal_unit_type != NAL_RASL_R)
578 s->pocTid0 = s->poc;
579
580 if (s->sps->sao_enabled) {
581 sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
582 sh->slice_sample_adaptive_offset_flag[1] =
583 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
584 } else {
585 sh->slice_sample_adaptive_offset_flag[0] = 0;
586 sh->slice_sample_adaptive_offset_flag[1] = 0;
587 sh->slice_sample_adaptive_offset_flag[2] = 0;
588 }
589
590 sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
591 if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
592 int nb_refs;
593
594 sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
595 if (sh->slice_type == B_SLICE)
596 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
597
598 if (get_bits1(gb)) { // num_ref_idx_active_override_flag
599 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
600 if (sh->slice_type == B_SLICE)
601 sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
602 }
603 if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
604 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
605 sh->nb_refs[L0], sh->nb_refs[L1]);
606 return AVERROR_INVALIDDATA;
607 }
608
609 sh->rpl_modification_flag[0] = 0;
610 sh->rpl_modification_flag[1] = 0;
611 nb_refs = ff_hevc_frame_nb_refs(s);
612 if (!nb_refs) {
613 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
614 return AVERROR_INVALIDDATA;
615 }
616
617 if (s->pps->lists_modification_present_flag && nb_refs > 1) {
618 sh->rpl_modification_flag[0] = get_bits1(gb);
619 if (sh->rpl_modification_flag[0]) {
620 for (i = 0; i < sh->nb_refs[L0]; i++)
621 sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
622 }
623
624 if (sh->slice_type == B_SLICE) {
625 sh->rpl_modification_flag[1] = get_bits1(gb);
626 if (sh->rpl_modification_flag[1] == 1)
627 for (i = 0; i < sh->nb_refs[L1]; i++)
628 sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
629 }
630 }
631
632 if (sh->slice_type == B_SLICE)
633 sh->mvd_l1_zero_flag = get_bits1(gb);
634
635 if (s->pps->cabac_init_present_flag)
636 sh->cabac_init_flag = get_bits1(gb);
637 else
638 sh->cabac_init_flag = 0;
639
640 sh->collocated_ref_idx = 0;
641 if (sh->slice_temporal_mvp_enabled_flag) {
642 sh->collocated_list = L0;
643 if (sh->slice_type == B_SLICE)
644 sh->collocated_list = !get_bits1(gb);
645
646 if (sh->nb_refs[sh->collocated_list] > 1) {
647 sh->collocated_ref_idx = get_ue_golomb_long(gb);
648 if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
649 av_log(s->avctx, AV_LOG_ERROR,
650 "Invalid collocated_ref_idx: %d.\n",
651 sh->collocated_ref_idx);
652 return AVERROR_INVALIDDATA;
653 }
654 }
655 }
656
657 if ((s->pps->weighted_pred_flag && sh->slice_type == P_SLICE) ||
658 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
659 pred_weight_table(s, gb);
660 }
661
662 sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
663 if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
664 av_log(s->avctx, AV_LOG_ERROR,
665 "Invalid number of merging MVP candidates: %d.\n",
666 sh->max_num_merge_cand);
667 return AVERROR_INVALIDDATA;
668 }
669 }
670
671 sh->slice_qp_delta = get_se_golomb(gb);
672 if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
673 sh->slice_cb_qp_offset = get_se_golomb(gb);
674 sh->slice_cr_qp_offset = get_se_golomb(gb);
675 } else {
676 sh->slice_cb_qp_offset = 0;
677 sh->slice_cr_qp_offset = 0;
678 }
679
680 if (s->pps->deblocking_filter_control_present_flag) {
681 int deblocking_filter_override_flag = 0;
682
683 if (s->pps->deblocking_filter_override_enabled_flag)
684 deblocking_filter_override_flag = get_bits1(gb);
685
686 if (deblocking_filter_override_flag) {
687 sh->disable_deblocking_filter_flag = get_bits1(gb);
688 if (!sh->disable_deblocking_filter_flag) {
689 sh->beta_offset = get_se_golomb(gb) * 2;
690 sh->tc_offset = get_se_golomb(gb) * 2;
691 }
692 } else {
693 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
694 sh->beta_offset = s->pps->beta_offset;
695 sh->tc_offset = s->pps->tc_offset;
696 }
697 } else {
698 sh->disable_deblocking_filter_flag = 0;
699 sh->beta_offset = 0;
700 sh->tc_offset = 0;
701 }
702
703 if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
704 (sh->slice_sample_adaptive_offset_flag[0] ||
705 sh->slice_sample_adaptive_offset_flag[1] ||
706 !sh->disable_deblocking_filter_flag)) {
707 sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
708 } else {
709 sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
710 }
711 } else if (!s->slice_initialized) {
712 av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
713 return AVERROR_INVALIDDATA;
714 }
715
716 sh->num_entry_point_offsets = 0;
717 if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
718 sh->num_entry_point_offsets = get_ue_golomb_long(gb);
719 if (sh->num_entry_point_offsets > 0) {
720 int offset_len = get_ue_golomb_long(gb) + 1;
721
722 for (i = 0; i < sh->num_entry_point_offsets; i++)
723 skip_bits(gb, offset_len);
724 }
725 }
726
727 if (s->pps->slice_header_extension_present_flag) {
728 int length = get_ue_golomb_long(gb);
729 for (i = 0; i < length; i++)
730 skip_bits(gb, 8); // slice_header_extension_data_byte
731 }
732
733 // Inferred parameters
734 sh->slice_qp = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
735 sh->slice_ctb_addr_rs = sh->slice_segment_addr;
736
737 s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
738
739 if (!s->pps->cu_qp_delta_enabled_flag)
740 s->HEVClc.qp_y = ((s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset) %
741 (52 + s->sps->qp_bd_offset)) - s->sps->qp_bd_offset;
742
743 s->slice_initialized = 1;
744
745 return 0;
746}
747
748#define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
749
750#define SET_SAO(elem, value) \
751do { \
752 if (!sao_merge_up_flag && !sao_merge_left_flag) \
753 sao->elem = value; \
754 else if (sao_merge_left_flag) \
755 sao->elem = CTB(s->sao, rx-1, ry).elem; \
756 else if (sao_merge_up_flag) \
757 sao->elem = CTB(s->sao, rx, ry-1).elem; \
758 else \
759 sao->elem = 0; \
760} while (0)
761
762static void hls_sao_param(HEVCContext *s, int rx, int ry)
763{
764 HEVCLocalContext *lc = &s->HEVClc;
765 int sao_merge_left_flag = 0;
766 int sao_merge_up_flag = 0;
767 int shift = s->sps->bit_depth - FFMIN(s->sps->bit_depth, 10);
768 SAOParams *sao = &CTB(s->sao, rx, ry);
769 int c_idx, i;
770
771 if (s->sh.slice_sample_adaptive_offset_flag[0] ||
772 s->sh.slice_sample_adaptive_offset_flag[1]) {
773 if (rx > 0) {
774 if (lc->ctb_left_flag)
775 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
776 }
777 if (ry > 0 && !sao_merge_left_flag) {
778 if (lc->ctb_up_flag)
779 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
780 }
781 }
782
783 for (c_idx = 0; c_idx < 3; c_idx++) {
784 if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
785 sao->type_idx[c_idx] = SAO_NOT_APPLIED;
786 continue;
787 }
788
789 if (c_idx == 2) {
790 sao->type_idx[2] = sao->type_idx[1];
791 sao->eo_class[2] = sao->eo_class[1];
792 } else {
793 SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
794 }
795
796 if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
797 continue;
798
799 for (i = 0; i < 4; i++)
800 SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
801
802 if (sao->type_idx[c_idx] == SAO_BAND) {
803 for (i = 0; i < 4; i++) {
804 if (sao->offset_abs[c_idx][i]) {
805 SET_SAO(offset_sign[c_idx][i],
806 ff_hevc_sao_offset_sign_decode(s));
807 } else {
808 sao->offset_sign[c_idx][i] = 0;
809 }
810 }
811 SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
812 } else if (c_idx != 2) {
813 SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
814 }
815
816 // Inferred parameters
817 sao->offset_val[c_idx][0] = 0;
818 for (i = 0; i < 4; i++) {
819 sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
820 if (sao->type_idx[c_idx] == SAO_EDGE) {
821 if (i > 1)
822 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
823 } else if (sao->offset_sign[c_idx][i]) {
824 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
825 }
826 }
827 }
828}
829
830#undef SET_SAO
831#undef CTB
832
833static void hls_residual_coding(HEVCContext *s, int x0, int y0,
834 int log2_trafo_size, enum ScanType scan_idx,
835 int c_idx)
836{
837#define GET_COORD(offset, n) \
838 do { \
839 x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n]; \
840 y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n]; \
841 } while (0)
842 HEVCLocalContext *lc = &s->HEVClc;
843 int transform_skip_flag = 0;
844
845 int last_significant_coeff_x, last_significant_coeff_y;
846 int last_scan_pos;
847 int n_end;
848 int num_coeff = 0;
849 int greater1_ctx = 1;
850
851 int num_last_subset;
852 int x_cg_last_sig, y_cg_last_sig;
853
854 const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
855
856 ptrdiff_t stride = s->frame->linesize[c_idx];
857 int hshift = s->sps->hshift[c_idx];
858 int vshift = s->sps->vshift[c_idx];
859 uint8_t *dst = &s->frame->data[c_idx][(y0 >> vshift) * stride +
860 ((x0 >> hshift) << s->sps->pixel_shift)];
861 DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
862 DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
863
864 int trafo_size = 1 << log2_trafo_size;
865 int i, qp, shift, add, scale, scale_m;
866 const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
867 const uint8_t *scale_matrix;
868 uint8_t dc_scale;
869
870 // Derive QP for dequant
871 if (!lc->cu.cu_transquant_bypass_flag) {
872 static const int qp_c[] = {
873 29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
874 };
875
876 static const uint8_t rem6[51 + 2 * 6 + 1] = {
877 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
878 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
879 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
880 };
881
882 static const uint8_t div6[51 + 2 * 6 + 1] = {
883 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3,
884 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6,
885 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
886 };
887 int qp_y = lc->qp_y;
888
889 if (c_idx == 0) {
890 qp = qp_y + s->sps->qp_bd_offset;
891 } else {
892 int qp_i, offset;
893
894 if (c_idx == 1)
895 offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
896 else
897 offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
898
899 qp_i = av_clip_c(qp_y + offset, -s->sps->qp_bd_offset, 57);
900 if (qp_i < 30)
901 qp = qp_i;
902 else if (qp_i > 43)
903 qp = qp_i - 6;
904 else
905 qp = qp_c[qp_i - 30];
906
907 qp += s->sps->qp_bd_offset;
908 }
909
910 shift = s->sps->bit_depth + log2_trafo_size - 5;
911 add = 1 << (shift - 1);
912 scale = level_scale[rem6[qp]] << (div6[qp]);
913 scale_m = 16; // default when no custom scaling lists.
914 dc_scale = 16;
915
916 if (s->sps->scaling_list_enable_flag) {
917 const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
918 &s->pps->scaling_list : &s->sps->scaling_list;
919 int matrix_id = lc->cu.pred_mode != MODE_INTRA;
920
921 if (log2_trafo_size != 5)
922 matrix_id = 3 * matrix_id + c_idx;
923
924 scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
925 if (log2_trafo_size >= 4)
926 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
927 }
928 }
929
930 if (s->pps->transform_skip_enabled_flag &&
931 !lc->cu.cu_transquant_bypass_flag &&
932 log2_trafo_size == 2) {
933 transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
934 }
935
936 last_significant_coeff_x =
937 ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
938 last_significant_coeff_y =
939 ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
940
941 if (last_significant_coeff_x > 3) {
942 int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
943 last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
944 (2 + (last_significant_coeff_x & 1)) +
945 suffix;
946 }
947
948 if (last_significant_coeff_y > 3) {
949 int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
950 last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
951 (2 + (last_significant_coeff_y & 1)) +
952 suffix;
953 }
954
955 if (scan_idx == SCAN_VERT)
956 FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
957
958 x_cg_last_sig = last_significant_coeff_x >> 2;
959 y_cg_last_sig = last_significant_coeff_y >> 2;
960
961 switch (scan_idx) {
962 case SCAN_DIAG: {
963 int last_x_c = last_significant_coeff_x & 3;
964 int last_y_c = last_significant_coeff_y & 3;
965
966 scan_x_off = ff_hevc_diag_scan4x4_x;
967 scan_y_off = ff_hevc_diag_scan4x4_y;
968 num_coeff = diag_scan4x4_inv[last_y_c][last_x_c];
969 if (trafo_size == 4) {
970 scan_x_cg = scan_1x1;
971 scan_y_cg = scan_1x1;
972 } else if (trafo_size == 8) {
973 num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
974 scan_x_cg = diag_scan2x2_x;
975 scan_y_cg = diag_scan2x2_y;
976 } else if (trafo_size == 16) {
977 num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
978 scan_x_cg = ff_hevc_diag_scan4x4_x;
979 scan_y_cg = ff_hevc_diag_scan4x4_y;
980 } else { // trafo_size == 32
981 num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
982 scan_x_cg = ff_hevc_diag_scan8x8_x;
983 scan_y_cg = ff_hevc_diag_scan8x8_y;
984 }
985 break;
986 }
987 case SCAN_HORIZ:
988 scan_x_cg = horiz_scan2x2_x;
989 scan_y_cg = horiz_scan2x2_y;
990 scan_x_off = horiz_scan4x4_x;
991 scan_y_off = horiz_scan4x4_y;
992 num_coeff = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
993 break;
994 default: //SCAN_VERT
995 scan_x_cg = horiz_scan2x2_y;
996 scan_y_cg = horiz_scan2x2_x;
997 scan_x_off = horiz_scan4x4_y;
998 scan_y_off = horiz_scan4x4_x;
999 num_coeff = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1000 break;
1001 }
1002 num_coeff++;
1003 num_last_subset = (num_coeff - 1) >> 4;
1004
1005 for (i = num_last_subset; i >= 0; i--) {
1006 int n, m;
1007 int x_cg, y_cg, x_c, y_c;
1008 int implicit_non_zero_coeff = 0;
1009 int64_t trans_coeff_level;
1010 int prev_sig = 0;
1011 int offset = i << 4;
1012
1013 uint8_t significant_coeff_flag_idx[16];
1014 uint8_t nb_significant_coeff_flag = 0;
1015
1016 x_cg = scan_x_cg[i];
1017 y_cg = scan_y_cg[i];
1018
1019 if (i < num_last_subset && i > 0) {
1020 int ctx_cg = 0;
1021 if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1022 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1023 if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1024 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1025
1026 significant_coeff_group_flag[x_cg][y_cg] =
1027 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1028 implicit_non_zero_coeff = 1;
1029 } else {
1030 significant_coeff_group_flag[x_cg][y_cg] =
1031 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1032 (x_cg == 0 && y_cg == 0));
1033 }
1034
1035 last_scan_pos = num_coeff - offset - 1;
1036
1037 if (i == num_last_subset) {
1038 n_end = last_scan_pos - 1;
1039 significant_coeff_flag_idx[0] = last_scan_pos;
1040 nb_significant_coeff_flag = 1;
1041 } else {
1042 n_end = 15;
1043 }
1044
1045 if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1046 prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1047 if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1048 prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1049
1050 for (n = n_end; n >= 0; n--) {
1051 GET_COORD(offset, n);
1052
1053 if (significant_coeff_group_flag[x_cg][y_cg] &&
1054 (n > 0 || implicit_non_zero_coeff == 0)) {
1055 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1056 log2_trafo_size,
1057 scan_idx,
1058 prev_sig) == 1) {
1059 significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1060 nb_significant_coeff_flag++;
1061 implicit_non_zero_coeff = 0;
1062 }
1063 } else {
1064 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1065 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1066 significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1067 nb_significant_coeff_flag++;
1068 }
1069 }
1070 }
1071
1072 n_end = nb_significant_coeff_flag;
1073
1074 if (n_end) {
1075 int first_nz_pos_in_cg = 16;
1076 int last_nz_pos_in_cg = -1;
1077 int c_rice_param = 0;
1078 int first_greater1_coeff_idx = -1;
1079 uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1080 uint16_t coeff_sign_flag;
1081 int sum_abs = 0;
1082 int sign_hidden = 0;
1083
1084 // initialize first elem of coeff_bas_level_greater1_flag
1085 int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1086
1087 if (!(i == num_last_subset) && greater1_ctx == 0)
1088 ctx_set++;
1089 greater1_ctx = 1;
1090 last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1091
1092 for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1093 int n_idx = significant_coeff_flag_idx[m];
1094 int inc = (ctx_set << 2) + greater1_ctx;
1095 coeff_abs_level_greater1_flag[n_idx] =
1096 ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1097 if (coeff_abs_level_greater1_flag[n_idx]) {
1098 greater1_ctx = 0;
1099 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1100 greater1_ctx++;
1101 }
1102
1103 if (coeff_abs_level_greater1_flag[n_idx] &&
1104 first_greater1_coeff_idx == -1)
1105 first_greater1_coeff_idx = n_idx;
1106 }
1107 first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1108 sign_hidden = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1109 !lc->cu.cu_transquant_bypass_flag;
1110
1111 if (first_greater1_coeff_idx != -1) {
1112 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1113 }
1114 if (!s->pps->sign_data_hiding_flag || !sign_hidden) {
1115 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1116 } else {
1117 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1118 }
1119
1120 for (m = 0; m < n_end; m++) {
1121 n = significant_coeff_flag_idx[m];
1122 GET_COORD(offset, n);
1123 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1124 if (trans_coeff_level == ((m < 8) ?
1125 ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1126 int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1127
1128 trans_coeff_level += last_coeff_abs_level_remaining;
1129 if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1130 c_rice_param = FFMIN(c_rice_param + 1, 4);
1131 }
1132 if (s->pps->sign_data_hiding_flag && sign_hidden) {
1133 sum_abs += trans_coeff_level;
1134 if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1135 trans_coeff_level = -trans_coeff_level;
1136 }
1137 if (coeff_sign_flag >> 15)
1138 trans_coeff_level = -trans_coeff_level;
1139 coeff_sign_flag <<= 1;
1140 if (!lc->cu.cu_transquant_bypass_flag) {
1141 if (s->sps->scaling_list_enable_flag) {
1142 if (y_c || x_c || log2_trafo_size < 4) {
1143 int pos;
1144 switch (log2_trafo_size) {
1145 case 3: pos = (y_c << 3) + x_c; break;
1146 case 4: pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1147 case 5: pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1148 default: pos = (y_c << 2) + x_c;
1149 }
1150 scale_m = scale_matrix[pos];
1151 } else {
1152 scale_m = dc_scale;
1153 }
1154 }
1155 trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1156 if(trans_coeff_level < 0) {
1157 if((~trans_coeff_level) & 0xFffffffffff8000)
1158 trans_coeff_level = -32768;
1159 } else {
1160 if (trans_coeff_level & 0xffffffffffff8000)
1161 trans_coeff_level = 32767;
1162 }
1163 }
1164 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1165 }
1166 }
1167 }
1168
1169 if (lc->cu.cu_transquant_bypass_flag) {
1170 s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1171 } else {
1172 if (transform_skip_flag)
1173 s->hevcdsp.transform_skip(dst, coeffs, stride);
1174 else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1175 log2_trafo_size == 2)
1176 s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1177 else
1178 s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1179 }
1180}
1181
1182static void hls_transform_unit(HEVCContext *s, int x0, int y0,
1183 int xBase, int yBase, int cb_xBase, int cb_yBase,
1184 int log2_cb_size, int log2_trafo_size,
1185 int trafo_depth, int blk_idx)
1186{
1187 HEVCLocalContext *lc = &s->HEVClc;
1188
1189 if (lc->cu.pred_mode == MODE_INTRA) {
1190 int trafo_size = 1 << log2_trafo_size;
1191 ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1192
1193 s->hpc.intra_pred(s, x0, y0, log2_trafo_size, 0);
1194 if (log2_trafo_size > 2) {
1195 trafo_size = trafo_size << (s->sps->hshift[1] - 1);
1196 ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1197 s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 1);
1198 s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 2);
1199 } else if (blk_idx == 3) {
1200 trafo_size = trafo_size << s->sps->hshift[1];
1201 ff_hevc_set_neighbour_available(s, xBase, yBase,
1202 trafo_size, trafo_size);
1203 s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 1);
1204 s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 2);
1205 }
1206 }
1207
1208 if (lc->tt.cbf_luma ||
1209 SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1210 SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1211 int scan_idx = SCAN_DIAG;
1212 int scan_idx_c = SCAN_DIAG;
1213
1214 if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1215 lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1216 if (lc->tu.cu_qp_delta != 0)
1217 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1218 lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1219 lc->tu.is_cu_qp_delta_coded = 1;
1220 ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1221 }
1222
1223 if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1224 if (lc->tu.cur_intra_pred_mode >= 6 &&
1225 lc->tu.cur_intra_pred_mode <= 14) {
1226 scan_idx = SCAN_VERT;
1227 } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1228 lc->tu.cur_intra_pred_mode <= 30) {
1229 scan_idx = SCAN_HORIZ;
1230 }
1231
1232 if (lc->pu.intra_pred_mode_c >= 6 &&
1233 lc->pu.intra_pred_mode_c <= 14) {
1234 scan_idx_c = SCAN_VERT;
1235 } else if (lc->pu.intra_pred_mode_c >= 22 &&
1236 lc->pu.intra_pred_mode_c <= 30) {
1237 scan_idx_c = SCAN_HORIZ;
1238 }
1239 }
1240
1241 if (lc->tt.cbf_luma)
1242 hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1243 if (log2_trafo_size > 2) {
1244 if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0))
1245 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1246 if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0))
1247 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1248 } else if (blk_idx == 3) {
1249 if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], xBase, yBase))
1250 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1251 if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], xBase, yBase))
1252 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1253 }
1254 }
1255}
1256
1257static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1258{
1259 int cb_size = 1 << log2_cb_size;
1260 int log2_min_pu_size = s->sps->log2_min_pu_size;
1261
1262 int min_pu_width = s->sps->min_pu_width;
1263 int x_end = FFMIN(x0 + cb_size, s->sps->width);
1264 int y_end = FFMIN(y0 + cb_size, s->sps->height);
1265 int i, j;
1266
1267 for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1268 for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1269 s->is_pcm[i + j * min_pu_width] = 2;
1270}
1271
1272static void hls_transform_tree(HEVCContext *s, int x0, int y0,
1273 int xBase, int yBase, int cb_xBase, int cb_yBase,
1274 int log2_cb_size, int log2_trafo_size,
1275 int trafo_depth, int blk_idx)
1276{
1277 HEVCLocalContext *lc = &s->HEVClc;
1278 uint8_t split_transform_flag;
1279
1280 if (trafo_depth > 0 && log2_trafo_size == 2) {
1281 SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1282 SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase);
1283 SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1284 SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase);
1285 } else {
1286 SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1287 SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) = 0;
1288 }
1289
1290 if (lc->cu.intra_split_flag) {
1291 if (trafo_depth == 1)
1292 lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1293 } else {
1294 lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1295 }
1296
1297 lc->tt.cbf_luma = 1;
1298
1299 lc->tt.inter_split_flag = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1300 lc->cu.pred_mode == MODE_INTER &&
1301 lc->cu.part_mode != PART_2Nx2N &&
1302 trafo_depth == 0;
1303
1304 if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1305 log2_trafo_size > s->sps->log2_min_tb_size &&
1306 trafo_depth < lc->cu.max_trafo_depth &&
1307 !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1308 split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1309 } else {
1310 split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1311 (lc->cu.intra_split_flag && trafo_depth == 0) ||
1312 lc->tt.inter_split_flag;
1313 }
1314
1315 if (log2_trafo_size > 2) {
1316 if (trafo_depth == 0 ||
1317 SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase)) {
1318 SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1319 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1320 }
1321
1322 if (trafo_depth == 0 ||
1323 SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase)) {
1324 SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1325 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1326 }
1327 }
1328
1329 if (split_transform_flag) {
1330 int x1 = x0 + ((1 << log2_trafo_size) >> 1);
1331 int y1 = y0 + ((1 << log2_trafo_size) >> 1);
1332
1333 hls_transform_tree(s, x0, y0, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1334 log2_trafo_size - 1, trafo_depth + 1, 0);
1335 hls_transform_tree(s, x1, y0, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1336 log2_trafo_size - 1, trafo_depth + 1, 1);
1337 hls_transform_tree(s, x0, y1, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1338 log2_trafo_size - 1, trafo_depth + 1, 2);
1339 hls_transform_tree(s, x1, y1, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1340 log2_trafo_size - 1, trafo_depth + 1, 3);
1341 } else {
1342 int min_tu_size = 1 << s->sps->log2_min_tb_size;
1343 int log2_min_tu_size = s->sps->log2_min_tb_size;
1344 int min_tu_width = s->sps->min_tb_width;
1345
1346 if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1347 SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1348 SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1349 lc->tt.cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1350 }
1351
1352 hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1353 log2_cb_size, log2_trafo_size, trafo_depth, blk_idx);
1354
1355 // TODO: store cbf_luma somewhere else
1356 if (lc->tt.cbf_luma) {
1357 int i, j;
1358 for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1359 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1360 int x_tu = (x0 + j) >> log2_min_tu_size;
1361 int y_tu = (y0 + i) >> log2_min_tu_size;
1362 s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1363 }
1364 }
1365 if (!s->sh.disable_deblocking_filter_flag) {
1366 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size,
1367 lc->slice_or_tiles_up_boundary,
1368 lc->slice_or_tiles_left_boundary);
1369 if (s->pps->transquant_bypass_enable_flag &&
1370 lc->cu.cu_transquant_bypass_flag)
1371 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1372 }
1373 }
1374}
1375
1376static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1377{
1378 //TODO: non-4:2:0 support
1379 HEVCLocalContext *lc = &s->HEVClc;
1380 GetBitContext gb;
1381 int cb_size = 1 << log2_cb_size;
1382 int stride0 = s->frame->linesize[0];
1383 uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1384 int stride1 = s->frame->linesize[1];
1385 uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1386 int stride2 = s->frame->linesize[2];
1387 uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1388
1389 int length = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth;
1390 const uint8_t *pcm = skip_bytes(&s->HEVClc.cc, (length + 7) >> 3);
1391 int ret;
1392
1393 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
1394 lc->slice_or_tiles_up_boundary,
1395 lc->slice_or_tiles_left_boundary);
1396
1397 ret = init_get_bits(&gb, pcm, length);
1398 if (ret < 0)
1399 return ret;
1400
1401 s->hevcdsp.put_pcm(dst0, stride0, cb_size, &gb, s->sps->pcm.bit_depth);
1402 s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth);
1403 s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth);
1404 return 0;
1405}
1406
1407static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1408{
1409 HEVCLocalContext *lc = &s->HEVClc;
1410 int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1411 int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1412
1413 if (x)
1414 x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1415 if (y)
1416 y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1417
1418 switch (x) {
1419 case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s); break;
1420 case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1421 case 0: lc->pu.mvd.x = 0; break;
1422 }
1423
1424 switch (y) {
1425 case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s); break;
1426 case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1427 case 0: lc->pu.mvd.y = 0; break;
1428 }
1429}
1430
1431/**
1432 * 8.5.3.2.2.1 Luma sample interpolation process
1433 *
1434 * @param s HEVC decoding context
1435 * @param dst target buffer for block data at block position
1436 * @param dststride stride of the dst buffer
1437 * @param ref reference picture buffer at origin (0, 0)
1438 * @param mv motion vector (relative to block position) to get pixel data from
1439 * @param x_off horizontal position of block from origin (0, 0)
1440 * @param y_off vertical position of block from origin (0, 0)
1441 * @param block_w width of block
1442 * @param block_h height of block
1443 */
1444static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1445 AVFrame *ref, const Mv *mv, int x_off, int y_off,
1446 int block_w, int block_h)
1447{
1448 HEVCLocalContext *lc = &s->HEVClc;
1449 uint8_t *src = ref->data[0];
1450 ptrdiff_t srcstride = ref->linesize[0];
1451 int pic_width = s->sps->width;
1452 int pic_height = s->sps->height;
1453
1454 int mx = mv->x & 3;
1455 int my = mv->y & 3;
1456 int extra_left = ff_hevc_qpel_extra_before[mx];
1457 int extra_top = ff_hevc_qpel_extra_before[my];
1458
1459 x_off += mv->x >> 2;
1460 y_off += mv->y >> 2;
1461 src += y_off * srcstride + (x_off << s->sps->pixel_shift);
1462
1463 if (x_off < extra_left || y_off < extra_top ||
1464 x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1465 y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1466 int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
1467
1468 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset, srcstride,
1469 block_w + ff_hevc_qpel_extra[mx],
1470 block_h + ff_hevc_qpel_extra[my],
1471 x_off - extra_left, y_off - extra_top,
1472 pic_width, pic_height);
1473 src = lc->edge_emu_buffer + offset;
1474 }
1475 s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
1476 block_h, lc->mc_buffer);
1477}
1478
1479/**
1480 * 8.5.3.2.2.2 Chroma sample interpolation process
1481 *
1482 * @param s HEVC decoding context
1483 * @param dst1 target buffer for block data at block position (U plane)
1484 * @param dst2 target buffer for block data at block position (V plane)
1485 * @param dststride stride of the dst1 and dst2 buffers
1486 * @param ref reference picture buffer at origin (0, 0)
1487 * @param mv motion vector (relative to block position) to get pixel data from
1488 * @param x_off horizontal position of block from origin (0, 0)
1489 * @param y_off vertical position of block from origin (0, 0)
1490 * @param block_w width of block
1491 * @param block_h height of block
1492 */
1493static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1494 ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1495 int x_off, int y_off, int block_w, int block_h)
1496{
1497 HEVCLocalContext *lc = &s->HEVClc;
1498 uint8_t *src1 = ref->data[1];
1499 uint8_t *src2 = ref->data[2];
1500 ptrdiff_t src1stride = ref->linesize[1];
1501 ptrdiff_t src2stride = ref->linesize[2];
1502 int pic_width = s->sps->width >> 1;
1503 int pic_height = s->sps->height >> 1;
1504
1505 int mx = mv->x & 7;
1506 int my = mv->y & 7;
1507
1508 x_off += mv->x >> 3;
1509 y_off += mv->y >> 3;
1510 src1 += y_off * src1stride + (x_off << s->sps->pixel_shift);
1511 src2 += y_off * src2stride + (x_off << s->sps->pixel_shift);
1512
1513 if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1514 x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1515 y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1516 int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1517 int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1518
1519 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1, src1stride,
1520 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1521 x_off - EPEL_EXTRA_BEFORE,
1522 y_off - EPEL_EXTRA_BEFORE,
1523 pic_width, pic_height);
1524
1525 src1 = lc->edge_emu_buffer + offset1;
1526 s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1527 block_w, block_h, mx, my, lc->mc_buffer);
1528
1529 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2, src2stride,
1530 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1531 x_off - EPEL_EXTRA_BEFORE,
1532 y_off - EPEL_EXTRA_BEFORE,
1533 pic_width, pic_height);
1534 src2 = lc->edge_emu_buffer + offset2;
1535 s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1536 block_w, block_h, mx, my,
1537 lc->mc_buffer);
1538 } else {
1539 s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1540 block_w, block_h, mx, my,
1541 lc->mc_buffer);
1542 s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1543 block_w, block_h, mx, my,
1544 lc->mc_buffer);
1545 }
1546}
1547
1548static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1549 const Mv *mv, int y0, int height)
1550{
1551 int y = (mv->y >> 2) + y0 + height + 9;
1552 ff_thread_await_progress(&ref->tf, y, 0);
1553}
1554
1555static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1556 int nPbW, int nPbH,
1557 int log2_cb_size, int partIdx)
1558{
1559#define POS(c_idx, x, y) \
1560 &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1561 (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1562 HEVCLocalContext *lc = &s->HEVClc;
1563 int merge_idx = 0;
1564 struct MvField current_mv = {{{ 0 }}};
1565
1566 int min_pu_width = s->sps->min_pu_width;
1567
1568 MvField *tab_mvf = s->ref->tab_mvf;
1569 RefPicList *refPicList = s->ref->refPicList;
1570 HEVCFrame *ref0, *ref1;
1571
1572 int tmpstride = MAX_PB_SIZE;
1573
1574 uint8_t *dst0 = POS(0, x0, y0);
1575 uint8_t *dst1 = POS(1, x0, y0);
1576 uint8_t *dst2 = POS(2, x0, y0);
1577 int log2_min_cb_size = s->sps->log2_min_cb_size;
1578 int min_cb_width = s->sps->min_cb_width;
1579 int x_cb = x0 >> log2_min_cb_size;
1580 int y_cb = y0 >> log2_min_cb_size;
1581 int ref_idx[2];
1582 int mvp_flag[2];
1583 int x_pu, y_pu;
1584 int i, j;
1585
1586 if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1587 if (s->sh.max_num_merge_cand > 1)
1588 merge_idx = ff_hevc_merge_idx_decode(s);
1589 else
1590 merge_idx = 0;
1591
1592 ff_hevc_luma_mv_merge_mode(s, x0, y0,
1593 1 << log2_cb_size,
1594 1 << log2_cb_size,
1595 log2_cb_size, partIdx,
1596 merge_idx, &current_mv);
1597 x_pu = x0 >> s->sps->log2_min_pu_size;
1598 y_pu = y0 >> s->sps->log2_min_pu_size;
1599
1600 for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1601 for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1602 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1603 } else { /* MODE_INTER */
1604 lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1605 if (lc->pu.merge_flag) {
1606 if (s->sh.max_num_merge_cand > 1)
1607 merge_idx = ff_hevc_merge_idx_decode(s);
1608 else
1609 merge_idx = 0;
1610
1611 ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1612 partIdx, merge_idx, &current_mv);
1613 x_pu = x0 >> s->sps->log2_min_pu_size;
1614 y_pu = y0 >> s->sps->log2_min_pu_size;
1615
1616 for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1617 for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1618 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1619 } else {
1620 enum InterPredIdc inter_pred_idc = PRED_L0;
1621 ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1622 if (s->sh.slice_type == B_SLICE)
1623 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1624
1625 if (inter_pred_idc != PRED_L1) {
1626 if (s->sh.nb_refs[L0]) {
1627 ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1628 current_mv.ref_idx[0] = ref_idx[0];
1629 }
1630 current_mv.pred_flag[0] = 1;
1631 hls_mvd_coding(s, x0, y0, 0);
1632 mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s);
1633 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1634 partIdx, merge_idx, &current_mv,
1635 mvp_flag[0], 0);
1636 current_mv.mv[0].x += lc->pu.mvd.x;
1637 current_mv.mv[0].y += lc->pu.mvd.y;
1638 }
1639
1640 if (inter_pred_idc != PRED_L0) {
1641 if (s->sh.nb_refs[L1]) {
1642 ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1643 current_mv.ref_idx[1] = ref_idx[1];
1644 }
1645
1646 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1647 lc->pu.mvd.x = 0;
1648 lc->pu.mvd.y = 0;
1649 } else {
1650 hls_mvd_coding(s, x0, y0, 1);
1651 }
1652
1653 current_mv.pred_flag[1] = 1;
1654 mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s);
1655 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1656 partIdx, merge_idx, &current_mv,
1657 mvp_flag[1], 1);
1658 current_mv.mv[1].x += lc->pu.mvd.x;
1659 current_mv.mv[1].y += lc->pu.mvd.y;
1660 }
1661
1662 x_pu = x0 >> s->sps->log2_min_pu_size;
1663 y_pu = y0 >> s->sps->log2_min_pu_size;
1664
1665 for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1666 for(j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1667 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1668 }
1669 }
1670
1671 if (current_mv.pred_flag[0]) {
1672 ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1673 if (!ref0)
1674 return;
1675 hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1676 }
1677 if (current_mv.pred_flag[1]) {
1678 ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1679 if (!ref1)
1680 return;
1681 hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1682 }
1683
1684 if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1685 DECLARE_ALIGNED(16, int16_t, tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1686 DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1687
1688 luma_mc(s, tmp, tmpstride, ref0->frame,
1689 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1690
1691 if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1692 (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1693 s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1694 s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1695 s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1696 dst0, s->frame->linesize[0], tmp,
1697 tmpstride, nPbW, nPbH);
1698 } else {
1699 s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1700 }
1701 chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1702 &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1703
1704 if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1705 (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1706 s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1707 s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1708 s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1709 dst1, s->frame->linesize[1], tmp, tmpstride,
1710 nPbW / 2, nPbH / 2);
1711 s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1712 s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1713 s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1714 dst2, s->frame->linesize[2], tmp2, tmpstride,
1715 nPbW / 2, nPbH / 2);
1716 } else {
1717 s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1718 s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1719 }
1720 } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1721 DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1722 DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1723
1724 if (!ref1)
1725 return;
1726
1727 luma_mc(s, tmp, tmpstride, ref1->frame,
1728 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1729
1730 if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1731 (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1732 s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1733 s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1734 s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1735 dst0, s->frame->linesize[0], tmp, tmpstride,
1736 nPbW, nPbH);
1737 } else {
1738 s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1739 }
1740
1741 chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1742 &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1743
1744 if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1745 (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1746 s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1747 s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1748 s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1749 dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1750 s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1751 s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1752 s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1753 dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1754 } else {
1755 s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1756 s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1757 }
1758 } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1759 DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1760 DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1761 DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1762 DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1763 HEVCFrame *ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1764 HEVCFrame *ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1765
1766 if (!ref0 || !ref1)
1767 return;
1768
1769 luma_mc(s, tmp, tmpstride, ref0->frame,
1770 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1771 luma_mc(s, tmp2, tmpstride, ref1->frame,
1772 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1773
1774 if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1775 (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1776 s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1777 s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1778 s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1779 s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1780 s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1781 dst0, s->frame->linesize[0],
1782 tmp, tmp2, tmpstride, nPbW, nPbH);
1783 } else {
1784 s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
1785 tmp, tmp2, tmpstride, nPbW, nPbH);
1786 }
1787
1788 chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1789 &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1790 chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1791 &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1792
1793 if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1794 (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1795 s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1796 s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1797 s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1798 s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1799 s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1800 dst1, s->frame->linesize[1], tmp, tmp3,
1801 tmpstride, nPbW / 2, nPbH / 2);
1802 s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1803 s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1804 s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1805 s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1806 s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1807 dst2, s->frame->linesize[2], tmp2, tmp4,
1808 tmpstride, nPbW / 2, nPbH / 2);
1809 } else {
1810 s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1811 s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1812 }
1813 }
1814}
1815
1816/**
1817 * 8.4.1
1818 */
1819static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1820 int prev_intra_luma_pred_flag)
1821{
1822 HEVCLocalContext *lc = &s->HEVClc;
1823 int x_pu = x0 >> s->sps->log2_min_pu_size;
1824 int y_pu = y0 >> s->sps->log2_min_pu_size;
1825 int min_pu_width = s->sps->min_pu_width;
1826 int size_in_pus = pu_size >> s->sps->log2_min_pu_size;
1827 int x0b = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1828 int y0b = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1829
1830 int cand_up = (lc->ctb_up_flag || y0b) ?
1831 s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1832 int cand_left = (lc->ctb_left_flag || x0b) ?
1833 s->tab_ipm[y_pu * min_pu_width + x_pu - 1] : INTRA_DC;
1834
1835 int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1836
1837 MvField *tab_mvf = s->ref->tab_mvf;
1838 int intra_pred_mode;
1839 int candidate[3];
1840 int i, j;
1841
1842 // intra_pred_mode prediction does not cross vertical CTB boundaries
1843 if ((y0 - 1) < y_ctb)
1844 cand_up = INTRA_DC;
1845
1846 if (cand_left == cand_up) {
1847 if (cand_left < 2) {
1848 candidate[0] = INTRA_PLANAR;
1849 candidate[1] = INTRA_DC;
1850 candidate[2] = INTRA_ANGULAR_26;
1851 } else {
1852 candidate[0] = cand_left;
1853 candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1854 candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1855 }
1856 } else {
1857 candidate[0] = cand_left;
1858 candidate[1] = cand_up;
1859 if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1860 candidate[2] = INTRA_PLANAR;
1861 } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1862 candidate[2] = INTRA_DC;
1863 } else {
1864 candidate[2] = INTRA_ANGULAR_26;
1865 }
1866 }
1867
1868 if (prev_intra_luma_pred_flag) {
1869 intra_pred_mode = candidate[lc->pu.mpm_idx];
1870 } else {
1871 if (candidate[0] > candidate[1])
1872 FFSWAP(uint8_t, candidate[0], candidate[1]);
1873 if (candidate[0] > candidate[2])
1874 FFSWAP(uint8_t, candidate[0], candidate[2]);
1875 if (candidate[1] > candidate[2])
1876 FFSWAP(uint8_t, candidate[1], candidate[2]);
1877
1878 intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1879 for (i = 0; i < 3; i++)
1880 if (intra_pred_mode >= candidate[i])
1881 intra_pred_mode++;
1882 }
1883
1884 /* write the intra prediction units into the mv array */
1885 if (!size_in_pus)
1886 size_in_pus = 1;
1887 for (i = 0; i < size_in_pus; i++) {
1888 memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1889 intra_pred_mode, size_in_pus);
1890
1891 for (j = 0; j < size_in_pus; j++) {
1892 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra = 1;
1893 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1894 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1895 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0] = 0;
1896 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1] = 0;
1897 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x = 0;
1898 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y = 0;
1899 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x = 0;
1900 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y = 0;
1901 }
1902 }
1903
1904 return intra_pred_mode;
1905}
1906
1907static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1908 int log2_cb_size, int ct_depth)
1909{
1910 int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1911 int x_cb = x0 >> s->sps->log2_min_cb_size;
1912 int y_cb = y0 >> s->sps->log2_min_cb_size;
1913 int y;
1914
1915 for (y = 0; y < length; y++)
1916 memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1917 ct_depth, length);
1918}
1919
1920static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1921 int log2_cb_size)
1922{
1923 HEVCLocalContext *lc = &s->HEVClc;
1924 static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1925 uint8_t prev_intra_luma_pred_flag[4];
1926 int split = lc->cu.part_mode == PART_NxN;
1927 int pb_size = (1 << log2_cb_size) >> split;
1928 int side = split + 1;
1929 int chroma_mode;
1930 int i, j;
1931
1932 for (i = 0; i < side; i++)
1933 for (j = 0; j < side; j++)
1934 prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1935
1936 for (i = 0; i < side; i++) {
1937 for (j = 0; j < side; j++) {
1938 if (prev_intra_luma_pred_flag[2 * i + j])
1939 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1940 else
1941 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1942
1943 lc->pu.intra_pred_mode[2 * i + j] =
1944 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1945 prev_intra_luma_pred_flag[2 * i + j]);
1946 }
1947 }
1948
1949 chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1950 if (chroma_mode != 4) {
1951 if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1952 lc->pu.intra_pred_mode_c = 34;
1953 else
1954 lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
1955 } else {
1956 lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
1957 }
1958}
1959
1960static void intra_prediction_unit_default_value(HEVCContext *s,
1961 int x0, int y0,
1962 int log2_cb_size)
1963{
1964 HEVCLocalContext *lc = &s->HEVClc;
1965 int pb_size = 1 << log2_cb_size;
1966 int size_in_pus = pb_size >> s->sps->log2_min_pu_size;
1967 int min_pu_width = s->sps->min_pu_width;
1968 MvField *tab_mvf = s->ref->tab_mvf;
1969 int x_pu = x0 >> s->sps->log2_min_pu_size;
1970 int y_pu = y0 >> s->sps->log2_min_pu_size;
1971 int j, k;
1972
1973 if (size_in_pus == 0)
1974 size_in_pus = 1;
1975 for (j = 0; j < size_in_pus; j++) {
1976 memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
1977 for (k = 0; k < size_in_pus; k++)
1978 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
1979 }
1980}
1981
1982static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
1983{
1984 int cb_size = 1 << log2_cb_size;
1985 HEVCLocalContext *lc = &s->HEVClc;
1986 int log2_min_cb_size = s->sps->log2_min_cb_size;
1987 int length = cb_size >> log2_min_cb_size;
1988 int min_cb_width = s->sps->min_cb_width;
1989 int x_cb = x0 >> log2_min_cb_size;
1990 int y_cb = y0 >> log2_min_cb_size;
1991 int x, y;
1992
1993 lc->cu.x = x0;
1994 lc->cu.y = y0;
1995 lc->cu.rqt_root_cbf = 1;
1996 lc->cu.pred_mode = MODE_INTRA;
1997 lc->cu.part_mode = PART_2Nx2N;
1998 lc->cu.intra_split_flag = 0;
1999 lc->cu.pcm_flag = 0;
2000
2001 SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2002 for (x = 0; x < 4; x++)
2003 lc->pu.intra_pred_mode[x] = 1;
2004 if (s->pps->transquant_bypass_enable_flag) {
2005 lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2006 if (lc->cu.cu_transquant_bypass_flag)
2007 set_deblocking_bypass(s, x0, y0, log2_cb_size);
2008 } else
2009 lc->cu.cu_transquant_bypass_flag = 0;
2010
2011 if (s->sh.slice_type != I_SLICE) {
2012 uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2013
2014 lc->cu.pred_mode = MODE_SKIP;
2015 x = y_cb * min_cb_width + x_cb;
2016 for (y = 0; y < length; y++) {
2017 memset(&s->skip_flag[x], skip_flag, length);
2018 x += min_cb_width;
2019 }
2020 lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2021 }
2022
2023 if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2024 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2025 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2026
2027 if (!s->sh.disable_deblocking_filter_flag)
2028 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2029 lc->slice_or_tiles_up_boundary,
2030 lc->slice_or_tiles_left_boundary);
2031 } else {
2032 if (s->sh.slice_type != I_SLICE)
2033 lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2034 if (lc->cu.pred_mode != MODE_INTRA ||
2035 log2_cb_size == s->sps->log2_min_cb_size) {
2036 lc->cu.part_mode = ff_hevc_part_mode_decode(s, log2_cb_size);
2037 lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2038 lc->cu.pred_mode == MODE_INTRA;
2039 }
2040
2041 if (lc->cu.pred_mode == MODE_INTRA) {
2042 if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2043 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2044 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2045 lc->cu.pcm_flag = ff_hevc_pcm_flag_decode(s);
2046 }
2047 if (lc->cu.pcm_flag) {
2048 int ret;
2049 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2050 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2051 if (s->sps->pcm.loop_filter_disable_flag)
2052 set_deblocking_bypass(s, x0, y0, log2_cb_size);
2053
2054 if (ret < 0)
2055 return ret;
2056 } else {
2057 intra_prediction_unit(s, x0, y0, log2_cb_size);
2058 }
2059 } else {
2060 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2061 switch (lc->cu.part_mode) {
2062 case PART_2Nx2N:
2063 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2064 break;
2065 case PART_2NxN:
2066 hls_prediction_unit(s, x0, y0, cb_size, cb_size / 2, log2_cb_size, 0);
2067 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2068 break;
2069 case PART_Nx2N:
2070 hls_prediction_unit(s, x0, y0, cb_size / 2, cb_size, log2_cb_size, 0);
2071 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2072 break;
2073 case PART_2NxnU:
2074 hls_prediction_unit(s, x0, y0, cb_size, cb_size / 4, log2_cb_size, 0);
2075 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2076 break;
2077 case PART_2NxnD:
2078 hls_prediction_unit(s, x0, y0, cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2079 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size / 4, log2_cb_size, 1);
2080 break;
2081 case PART_nLx2N:
2082 hls_prediction_unit(s, x0, y0, cb_size / 4, cb_size, log2_cb_size, 0);
2083 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2084 break;
2085 case PART_nRx2N:
2086 hls_prediction_unit(s, x0, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2087 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size / 4, cb_size, log2_cb_size, 1);
2088 break;
2089 case PART_NxN:
2090 hls_prediction_unit(s, x0, y0, cb_size / 2, cb_size / 2, log2_cb_size, 0);
2091 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size / 2, log2_cb_size, 1);
2092 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2093 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2094 break;
2095 }
2096 }
2097
2098 if (!lc->cu.pcm_flag) {
2099 if (lc->cu.pred_mode != MODE_INTRA &&
2100 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2101 lc->cu.rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2102 }
2103 if (lc->cu.rqt_root_cbf) {
2104 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2105 s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2106 s->sps->max_transform_hierarchy_depth_inter;
2107 hls_transform_tree(s, x0, y0, x0, y0, x0, y0, log2_cb_size,
2108 log2_cb_size, 0, 0);
2109 } else {
2110 if (!s->sh.disable_deblocking_filter_flag)
2111 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2112 lc->slice_or_tiles_up_boundary,
2113 lc->slice_or_tiles_left_boundary);
2114 }
2115 }
2116 }
2117
2118 if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2119 ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2120
2121 x = y_cb * min_cb_width + x_cb;
2122 for (y = 0; y < length; y++) {
2123 memset(&s->qp_y_tab[x], lc->qp_y, length);
2124 x += min_cb_width;
2125 }
2126
2127 set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2128
2129 return 0;
2130}
2131
2132static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2133 int log2_cb_size, int cb_depth)
2134{
2135 HEVCLocalContext *lc = &s->HEVClc;
2136 const int cb_size = 1 << log2_cb_size;
2137
2138 lc->ct.depth = cb_depth;
2139 if (x0 + cb_size <= s->sps->width &&
2140 y0 + cb_size <= s->sps->height &&
2141 log2_cb_size > s->sps->log2_min_cb_size) {
2142 SAMPLE(s->split_cu_flag, x0, y0) =
2143 ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2144 } else {
2145 SAMPLE(s->split_cu_flag, x0, y0) =
2146 (log2_cb_size > s->sps->log2_min_cb_size);
2147 }
2148 if (s->pps->cu_qp_delta_enabled_flag &&
2149 log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2150 lc->tu.is_cu_qp_delta_coded = 0;
2151 lc->tu.cu_qp_delta = 0;
2152 }
2153
2154 if (SAMPLE(s->split_cu_flag, x0, y0)) {
2155 const int cb_size_split = cb_size >> 1;
2156 const int x1 = x0 + cb_size_split;
2157 const int y1 = y0 + cb_size_split;
2158
2159 log2_cb_size--;
2160 cb_depth++;
2161
2162#define SUBDIVIDE(x, y) \
2163do { \
2164 if (x < s->sps->width && y < s->sps->height) { \
2165 int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2166 if (ret < 0) \
2167 return ret; \
2168 } \
2169} while (0)
2170
2171 SUBDIVIDE(x0, y0);
2172 SUBDIVIDE(x1, y0);
2173 SUBDIVIDE(x0, y1);
2174 SUBDIVIDE(x1, y1);
2175 } else {
2176 int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2177 if (ret < 0)
2178 return ret;
2179 }
2180
2181 return 0;
2182}
2183
2184static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2185 int ctb_addr_ts)
2186{
2187 HEVCLocalContext *lc = &s->HEVClc;
2188 int ctb_size = 1 << s->sps->log2_ctb_size;
2189 int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2190 int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2191
2192 int tile_left_boundary, tile_up_boundary;
2193 int slice_left_boundary, slice_up_boundary;
2194
2195 s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2196
2197 if (s->pps->entropy_coding_sync_enabled_flag) {
2198 if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2199 lc->first_qp_group = 1;
2200 lc->end_of_tiles_x = s->sps->width;
2201 } else if (s->pps->tiles_enabled_flag) {
2202 if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2203 int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2204 lc->start_of_tiles_x = x_ctb;
2205 lc->end_of_tiles_x = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2206 lc->first_qp_group = 1;
2207 }
2208 } else {
2209 lc->end_of_tiles_x = s->sps->width;
2210 }
2211
2212 lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2213
2214 if (s->pps->tiles_enabled_flag) {
2215 tile_left_boundary = x_ctb > 0 &&
2216 s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]];
2217 slice_left_boundary = x_ctb > 0 &&
2218 s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - 1];
2219 tile_up_boundary = y_ctb > 0 &&
2220 s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]];
2221 slice_up_boundary = y_ctb > 0 &&
2222 s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width];
2223 } else {
2224 tile_left_boundary =
2225 tile_up_boundary = 1;
2226 slice_left_boundary = ctb_addr_in_slice > 0;
2227 slice_up_boundary = ctb_addr_in_slice >= s->sps->ctb_width;
2228 }
2229 lc->slice_or_tiles_left_boundary = (!slice_left_boundary) + (!tile_left_boundary << 1);
2230 lc->slice_or_tiles_up_boundary = (!slice_up_boundary + (!tile_up_boundary << 1));
2231 lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && tile_left_boundary);
2232 lc->ctb_up_flag = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && tile_up_boundary);
2233 lc->ctb_up_right_flag = ((y_ctb > 0) && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2234 lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0) && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2235}
2236
2237static int hls_slice_data(HEVCContext *s)
2238{
2239 int ctb_size = 1 << s->sps->log2_ctb_size;
2240 int more_data = 1;
2241 int x_ctb = 0;
2242 int y_ctb = 0;
2243 int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2244 int ret;
2245
2246 while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2247 int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2248
2249 x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2250 y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2251 hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2252
2253 ff_hevc_cabac_init(s, ctb_addr_ts);
2254
2255 hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2256
2257 s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2258 s->deblock[ctb_addr_rs].tc_offset = s->sh.tc_offset;
2259 s->filter_slice_edges[ctb_addr_rs] = s->sh.slice_loop_filter_across_slices_enabled_flag;
2260
2261 ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2262 if (ret < 0)
2263 return ret;
2264 more_data = !ff_hevc_end_of_slice_flag_decode(s);
2265
2266 ctb_addr_ts++;
2267 ff_hevc_save_states(s, ctb_addr_ts);
2268 ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2269 }
2270
2271 if (x_ctb + ctb_size >= s->sps->width &&
2272 y_ctb + ctb_size >= s->sps->height)
2273 ff_hevc_hls_filter(s, x_ctb, y_ctb);
2274
2275 return ctb_addr_ts;
2276}
2277
2278/**
2279 * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2280 * 0 if the unit should be skipped, 1 otherwise
2281 */
2282static int hls_nal_unit(HEVCContext *s)
2283{
2284 GetBitContext *gb = &s->HEVClc.gb;
2285 int nuh_layer_id;
2286
2287 if (get_bits1(gb) != 0)
2288 return AVERROR_INVALIDDATA;
2289
2290 s->nal_unit_type = get_bits(gb, 6);
2291
2292 nuh_layer_id = get_bits(gb, 6);
2293 s->temporal_id = get_bits(gb, 3) - 1;
2294 if (s->temporal_id < 0)
2295 return AVERROR_INVALIDDATA;
2296
2297 av_log(s->avctx, AV_LOG_DEBUG,
2298 "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2299 s->nal_unit_type, nuh_layer_id, s->temporal_id);
2300
2301 return nuh_layer_id == 0;
2302}
2303
2304static void restore_tqb_pixels(HEVCContext *s)
2305{
2306 int min_pu_size = 1 << s->sps->log2_min_pu_size;
2307 int x, y, c_idx;
2308
2309 for (c_idx = 0; c_idx < 3; c_idx++) {
2310 ptrdiff_t stride = s->frame->linesize[c_idx];
2311 int hshift = s->sps->hshift[c_idx];
2312 int vshift = s->sps->vshift[c_idx];
2313 for (y = 0; y < s->sps->min_pu_height; y++) {
2314 for (x = 0; x < s->sps->min_pu_width; x++) {
2315 if (s->is_pcm[y * s->sps->min_pu_width + x]) {
2316 int n;
2317 int len = min_pu_size >> hshift;
2318 uint8_t *src = &s->frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2319 uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2320 for (n = 0; n < (min_pu_size >> vshift); n++) {
2321 memcpy(dst, src, len);
2322 src += stride;
2323 dst += stride;
2324 }
2325 }
2326 }
2327 }
2328 }
2329}
2330
2331static int hevc_frame_start(HEVCContext *s)
2332{
2333 HEVCLocalContext *lc = &s->HEVClc;
2334 int ret;
2335
2336 memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2337 memset(s->vertical_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2338 memset(s->cbf_luma, 0, s->sps->min_tb_width * s->sps->min_tb_height);
2339 memset(s->is_pcm, 0, s->sps->min_pu_width * s->sps->min_pu_height);
2340
2341 lc->start_of_tiles_x = 0;
2342 s->is_decoded = 0;
2343
2344 if (s->pps->tiles_enabled_flag)
2345 lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2346
2347 ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2348 s->poc);
2349 if (ret < 0)
2350 goto fail;
2351
2352 av_fast_malloc(&lc->edge_emu_buffer, &lc->edge_emu_buffer_size,
2353 (MAX_PB_SIZE + 7) * s->ref->frame->linesize[0]);
2354 if (!lc->edge_emu_buffer) {
2355 ret = AVERROR(ENOMEM);
2356 goto fail;
2357 }
2358
2359 ret = ff_hevc_frame_rps(s);
2360 if (ret < 0) {
2361 av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2362 goto fail;
2363 }
2364
2365 av_frame_unref(s->output_frame);
2366 ret = ff_hevc_output_frame(s, s->output_frame, 0);
2367 if (ret < 0)
2368 goto fail;
2369
2370 ff_thread_finish_setup(s->avctx);
2371
2372 return 0;
2373
2374fail:
2375 if (s->ref)
2376 ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2377 s->ref = NULL;
2378 return ret;
2379}
2380
2381static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
2382{
2383 HEVCLocalContext *lc = &s->HEVClc;
2384 GetBitContext *gb = &lc->gb;
2385 int ctb_addr_ts, ret;
2386
2387 ret = init_get_bits8(gb, nal, length);
2388 if (ret < 0)
2389 return ret;
2390
2391 ret = hls_nal_unit(s);
2392 if (ret < 0) {
2393 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2394 s->nal_unit_type);
2395 if (s->avctx->err_recognition & AV_EF_EXPLODE)
2396 return ret;
2397 return 0;
2398 } else if (!ret)
2399 return 0;
2400
2401 switch (s->nal_unit_type) {
2402 case NAL_VPS:
2403 ret = ff_hevc_decode_nal_vps(s);
2404 if (ret < 0)
2405 return ret;
2406 break;
2407 case NAL_SPS:
2408 ret = ff_hevc_decode_nal_sps(s);
2409 if (ret < 0)
2410 return ret;
2411 break;
2412 case NAL_PPS:
2413 ret = ff_hevc_decode_nal_pps(s);
2414 if (ret < 0)
2415 return ret;
2416 break;
2417 case NAL_SEI_PREFIX:
2418 case NAL_SEI_SUFFIX:
2419 ret = ff_hevc_decode_nal_sei(s);
2420 if (ret < 0)
2421 return ret;
2422 break;
2423 case NAL_TRAIL_R:
2424 case NAL_TRAIL_N:
2425 case NAL_TSA_N:
2426 case NAL_TSA_R:
2427 case NAL_STSA_N:
2428 case NAL_STSA_R:
2429 case NAL_BLA_W_LP:
2430 case NAL_BLA_W_RADL:
2431 case NAL_BLA_N_LP:
2432 case NAL_IDR_W_RADL:
2433 case NAL_IDR_N_LP:
2434 case NAL_CRA_NUT:
2435 case NAL_RADL_N:
2436 case NAL_RADL_R:
2437 case NAL_RASL_N:
2438 case NAL_RASL_R:
2439 ret = hls_slice_header(s);
2440 if (ret < 0)
2441 return ret;
2442
2443 if (s->max_ra == INT_MAX) {
2444 if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2445 s->max_ra = s->poc;
2446 } else {
2447 if (IS_IDR(s))
2448 s->max_ra = INT_MIN;
2449 }
2450 }
2451
2452 if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2453 s->poc <= s->max_ra) {
2454 s->is_decoded = 0;
2455 break;
2456 } else {
2457 if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2458 s->max_ra = INT_MIN;
2459 }
2460
2461 if (s->sh.first_slice_in_pic_flag) {
2462 ret = hevc_frame_start(s);
2463 if (ret < 0)
2464 return ret;
2465 } else if (!s->ref) {
2466 av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2467 return AVERROR_INVALIDDATA;
2468 }
2469
2470 if (!s->sh.dependent_slice_segment_flag &&
2471 s->sh.slice_type != I_SLICE) {
2472 ret = ff_hevc_slice_rpl(s);
2473 if (ret < 0) {
2474 av_log(s->avctx, AV_LOG_WARNING,
2475 "Error constructing the reference lists for the current slice.\n");
2476 if (s->avctx->err_recognition & AV_EF_EXPLODE)
2477 return ret;
2478 }
2479 }
2480
2481 ctb_addr_ts = hls_slice_data(s);
2482 if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2483 s->is_decoded = 1;
2484 if ((s->pps->transquant_bypass_enable_flag ||
2485 (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) &&
2486 s->sps->sao_enabled)
2487 restore_tqb_pixels(s);
2488 }
2489
2490 if (ctb_addr_ts < 0)
2491 return ctb_addr_ts;
2492 break;
2493 case NAL_EOS_NUT:
2494 case NAL_EOB_NUT:
2495 s->seq_decode = (s->seq_decode + 1) & 0xff;
2496 s->max_ra = INT_MAX;
2497 break;
2498 case NAL_AUD:
2499 case NAL_FD_NUT:
2500 break;
2501 default:
2502 av_log(s->avctx, AV_LOG_INFO,
2503 "Skipping NAL unit %d\n", s->nal_unit_type);
2504 }
2505
2506 return 0;
2507}
2508
2509/* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2510 * between these functions would be nice. */
2511static int extract_rbsp(const uint8_t *src, int length,
2512 HEVCNAL *nal)
2513{
2514 int i, si, di;
2515 uint8_t *dst;
2516
2517#define STARTCODE_TEST \
2518 if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) { \
2519 if (src[i + 2] != 3) { \
2520 /* startcode, so we must be past the end */ \
2521 length = i; \
2522 } \
2523 break; \
2524 }
2525#if HAVE_FAST_UNALIGNED
2526#define FIND_FIRST_ZERO \
2527 if (i > 0 && !src[i]) \
2528 i--; \
2529 while (src[i]) \
2530 i++
2531#if HAVE_FAST_64BIT
2532 for (i = 0; i + 1 < length; i += 9) {
2533 if (!((~AV_RN64A(src + i) &
2534 (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2535 0x8000800080008080ULL))
2536 continue;
2537 FIND_FIRST_ZERO;
2538 STARTCODE_TEST;
2539 i -= 7;
2540 }
2541#else
2542 for (i = 0; i + 1 < length; i += 5) {
2543 if (!((~AV_RN32A(src + i) &
2544 (AV_RN32A(src + i) - 0x01000101U)) &
2545 0x80008080U))
2546 continue;
2547 FIND_FIRST_ZERO;
2548 STARTCODE_TEST;
2549 i -= 3;
2550 }
2551#endif /* HAVE_FAST_64BIT */
2552#else
2553 for (i = 0; i + 1 < length; i += 2) {
2554 if (src[i])
2555 continue;
2556 if (i > 0 && src[i - 1] == 0)
2557 i--;
2558 STARTCODE_TEST;
2559 }
2560#endif /* HAVE_FAST_UNALIGNED */
2561
2562 if (i >= length - 1) { // no escaped 0
2563 nal->data = src;
2564 nal->size = length;
2565 return length;
2566 }
2567
2568 av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2569 length + FF_INPUT_BUFFER_PADDING_SIZE);
2570 if (!nal->rbsp_buffer)
2571 return AVERROR(ENOMEM);
2572
2573 dst = nal->rbsp_buffer;
2574
2575 memcpy(dst, src, i);
2576 si = di = i;
2577 while (si + 2 < length) {
2578 // remove escapes (very rare 1:2^22)
2579 if (src[si + 2] > 3) {
2580 dst[di++] = src[si++];
2581 dst[di++] = src[si++];
2582 } else if (src[si] == 0 && src[si + 1] == 0) {
2583 if (src[si + 2] == 3) { // escape
2584 dst[di++] = 0;
2585 dst[di++] = 0;
2586 si += 3;
2587
2588 continue;
2589 } else // next start code
2590 goto nsc;
2591 }
2592
2593 dst[di++] = src[si++];
2594 }
2595 while (si < length)
2596 dst[di++] = src[si++];
2597
2598nsc:
2599 memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2600
2601 nal->data = dst;
2602 nal->size = di;
2603 return si;
2604}
2605
2606static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2607{
2608 int i, consumed, ret = 0;
2609
2610 s->ref = NULL;
2611 s->eos = 0;
2612
2613 /* split the input packet into NAL units, so we know the upper bound on the
2614 * number of slices in the frame */
2615 s->nb_nals = 0;
2616 while (length >= 4) {
2617 HEVCNAL *nal;
2618 int extract_length = 0;
2619
2620 if (s->is_nalff) {
2621 int i;
2622 for (i = 0; i < s->nal_length_size; i++)
2623 extract_length = (extract_length << 8) | buf[i];
2624 buf += s->nal_length_size;
2625 length -= s->nal_length_size;
2626
2627 if (extract_length > length) {
2628 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2629 ret = AVERROR_INVALIDDATA;
2630 goto fail;
2631 }
2632 } else {
2633 if (buf[2] == 0) {
2634 length--;
2635 buf++;
2636 continue;
2637 }
2638 if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2639 ret = AVERROR_INVALIDDATA;
2640 goto fail;
2641 }
2642
2643 buf += 3;
2644 length -= 3;
2645 extract_length = length;
2646 }
2647
2648 if (s->nals_allocated < s->nb_nals + 1) {
2649 int new_size = s->nals_allocated + 1;
2650 HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2651 if (!tmp) {
2652 ret = AVERROR(ENOMEM);
2653 goto fail;
2654 }
2655 s->nals = tmp;
2656 memset(s->nals + s->nals_allocated, 0,
2657 (new_size - s->nals_allocated) * sizeof(*tmp));
2658 s->nals_allocated = new_size;
2659 }
2660 nal = &s->nals[s->nb_nals++];
2661
2662 consumed = extract_rbsp(buf, extract_length, nal);
2663 if (consumed < 0) {
2664 ret = consumed;
2665 goto fail;
2666 }
2667
2668 ret = init_get_bits8(&s->HEVClc.gb, nal->data, nal->size);
2669 if (ret < 0)
2670 goto fail;
2671 hls_nal_unit(s);
2672
2673 if (s->nal_unit_type == NAL_EOB_NUT ||
2674 s->nal_unit_type == NAL_EOS_NUT)
2675 s->eos = 1;
2676
2677 buf += consumed;
2678 length -= consumed;
2679 }
2680
2681 /* parse the NAL units */
2682 for (i = 0; i < s->nb_nals; i++) {
2683 int ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
2684 if (ret < 0) {
2685 av_log(s->avctx, AV_LOG_WARNING,
2686 "Error parsing NAL unit #%d.\n", i);
2687 if (s->avctx->err_recognition & AV_EF_EXPLODE)
2688 goto fail;
2689 }
2690 }
2691
2692fail:
2693 if (s->ref)
2694 ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2695
2696 return ret;
2697}
2698
2699static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2700{
2701 int i;
2702 for (i = 0; i < 16; i++)
2703 av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2704}
2705
2706static int verify_md5(HEVCContext *s, AVFrame *frame)
2707{
2708 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2709 int pixel_shift = desc->comp[0].depth_minus1 > 7;
2710 int i, j;
2711
2712 if (!desc)
2713 return AVERROR(EINVAL);
2714
2715 av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2716 s->poc);
2717
2718 /* the checksums are LE, so we have to byteswap for >8bpp formats
2719 * on BE arches */
2720#if HAVE_BIGENDIAN
2721 if (pixel_shift && !s->checksum_buf) {
2722 av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2723 FFMAX3(frame->linesize[0], frame->linesize[1],
2724 frame->linesize[2]));
2725 if (!s->checksum_buf)
2726 return AVERROR(ENOMEM);
2727 }
2728#endif
2729
2730 for (i = 0; frame->data[i]; i++) {
2731 int width = s->avctx->coded_width;
2732 int height = s->avctx->coded_height;
2733 int w = (i == 1 || i == 2) ? (width >> desc->log2_chroma_w) : width;
2734 int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2735 uint8_t md5[16];
2736
2737 av_md5_init(s->md5_ctx);
2738 for (j = 0; j < h; j++) {
2739 const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2740#if HAVE_BIGENDIAN
2741 if (pixel_shift) {
2742 s->dsp.bswap16_buf((uint16_t*)s->checksum_buf,
2743 (const uint16_t*)src, w);
2744 src = s->checksum_buf;
2745 }
2746#endif
2747 av_md5_update(s->md5_ctx, src, w << pixel_shift);
2748 }
2749 av_md5_final(s->md5_ctx, md5);
2750
2751 if (!memcmp(md5, s->md5[i], 16)) {
2752 av_log (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2753 print_md5(s->avctx, AV_LOG_DEBUG, md5);
2754 av_log (s->avctx, AV_LOG_DEBUG, "; ");
2755 } else {
2756 av_log (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2757 print_md5(s->avctx, AV_LOG_ERROR, md5);
2758 av_log (s->avctx, AV_LOG_ERROR, " != ");
2759 print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2760 av_log (s->avctx, AV_LOG_ERROR, "\n");
2761 return AVERROR_INVALIDDATA;
2762 }
2763 }
2764
2765 av_log(s->avctx, AV_LOG_DEBUG, "\n");
2766
2767 return 0;
2768}
2769
2770static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2771 AVPacket *avpkt)
2772{
2773 int ret;
2774 HEVCContext *s = avctx->priv_data;
2775
2776 if (!avpkt->size) {
2777 ret = ff_hevc_output_frame(s, data, 1);
2778 if (ret < 0)
2779 return ret;
2780
2781 *got_output = ret;
2782 return 0;
2783 }
2784
2785 s->ref = NULL;
2786 ret = decode_nal_units(s, avpkt->data, avpkt->size);
2787 if (ret < 0)
2788 return ret;
2789
2790 /* verify the SEI checksum */
2791 if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2792 s->is_md5) {
2793 ret = verify_md5(s, s->ref->frame);
2794 if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2795 ff_hevc_unref_frame(s, s->ref, ~0);
2796 return ret;
2797 }
2798 }
2799 s->is_md5 = 0;
2800
2801 if (s->is_decoded) {
2802 av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2803 s->is_decoded = 0;
2804 }
2805
2806 if (s->output_frame->buf[0]) {
2807 av_frame_move_ref(data, s->output_frame);
2808 *got_output = 1;
2809 }
2810
2811 return avpkt->size;
2812}
2813
2814static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2815{
2816 int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2817 if (ret < 0)
2818 return ret;
2819
2820 dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2821 if (!dst->tab_mvf_buf)
2822 goto fail;
2823 dst->tab_mvf = src->tab_mvf;
2824
2825 dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2826 if (!dst->rpl_tab_buf)
2827 goto fail;
2828 dst->rpl_tab = src->rpl_tab;
2829
2830 dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2831 if (!dst->rpl_buf)
2832 goto fail;
2833
2834 dst->poc = src->poc;
2835 dst->ctb_count = src->ctb_count;
2836 dst->window = src->window;
2837 dst->flags = src->flags;
2838 dst->sequence = src->sequence;
2839
2840 return 0;
2841fail:
2842 ff_hevc_unref_frame(s, dst, ~0);
2843 return AVERROR(ENOMEM);
2844}
2845
2846static av_cold int hevc_decode_free(AVCodecContext *avctx)
2847{
2848 HEVCContext *s = avctx->priv_data;
2849 HEVCLocalContext *lc = &s->HEVClc;
2850 int i;
2851
2852 pic_arrays_free(s);
2853
2854 av_freep(&lc->edge_emu_buffer);
2855 av_freep(&s->md5_ctx);
2856
2857 av_frame_free(&s->tmp_frame);
2858 av_frame_free(&s->output_frame);
2859
2860 for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2861 ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2862 av_frame_free(&s->DPB[i].frame);
2863 }
2864
2865 for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
2866 av_freep(&s->vps_list[i]);
2867 for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
2868 av_buffer_unref(&s->sps_list[i]);
2869 for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
2870 av_buffer_unref(&s->pps_list[i]);
2871
2872 for (i = 0; i < s->nals_allocated; i++)
2873 av_freep(&s->nals[i].rbsp_buffer);
2874 av_freep(&s->nals);
2875 s->nals_allocated = 0;
2876
2877 return 0;
2878}
2879
2880static av_cold int hevc_init_context(AVCodecContext *avctx)
2881{
2882 HEVCContext *s = avctx->priv_data;
2883 int i;
2884
2885 s->avctx = avctx;
2886
2887 s->tmp_frame = av_frame_alloc();
2888 if (!s->tmp_frame)
2889 goto fail;
2890
2891 s->output_frame = av_frame_alloc();
2892 if (!s->output_frame)
2893 goto fail;
2894
2895 for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2896 s->DPB[i].frame = av_frame_alloc();
2897 if (!s->DPB[i].frame)
2898 goto fail;
2899 s->DPB[i].tf.f = s->DPB[i].frame;
2900 }
2901
2902 s->max_ra = INT_MAX;
2903
2904 s->md5_ctx = av_md5_alloc();
2905 if (!s->md5_ctx)
2906 goto fail;
2907
2908 ff_dsputil_init(&s->dsp, avctx);
2909
2910 s->context_initialized = 1;
2911
2912 return 0;
2913
2914fail:
2915 hevc_decode_free(avctx);
2916 return AVERROR(ENOMEM);
2917}
2918
2919static int hevc_update_thread_context(AVCodecContext *dst,
2920 const AVCodecContext *src)
2921{
2922 HEVCContext *s = dst->priv_data;
2923 HEVCContext *s0 = src->priv_data;
2924 int i, ret;
2925
2926 if (!s->context_initialized) {
2927 ret = hevc_init_context(dst);
2928 if (ret < 0)
2929 return ret;
2930 }
2931
2932 for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2933 ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2934 if (s0->DPB[i].frame->buf[0]) {
2935 ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
2936 if (ret < 0)
2937 return ret;
2938 }
2939 }
2940
2941 for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
2942 av_buffer_unref(&s->sps_list[i]);
2943 if (s0->sps_list[i]) {
2944 s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
2945 if (!s->sps_list[i])
2946 return AVERROR(ENOMEM);
2947 }
2948 }
2949
2950 for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
2951 av_buffer_unref(&s->pps_list[i]);
2952 if (s0->pps_list[i]) {
2953 s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
2954 if (!s->pps_list[i])
2955 return AVERROR(ENOMEM);
2956 }
2957 }
2958
2959 if (s->sps != s0->sps)
2960 ret = set_sps(s, s0->sps);
2961
2962 s->seq_decode = s0->seq_decode;
2963 s->seq_output = s0->seq_output;
2964 s->pocTid0 = s0->pocTid0;
2965 s->max_ra = s0->max_ra;
2966
2967 s->is_nalff = s0->is_nalff;
2968 s->nal_length_size = s0->nal_length_size;
2969
2970 if (s0->eos) {
2971 s->seq_decode = (s->seq_decode + 1) & 0xff;
2972 s->max_ra = INT_MAX;
2973 }
2974
2975 return 0;
2976}
2977
2978static int hevc_decode_extradata(HEVCContext *s)
2979{
2980 AVCodecContext *avctx = s->avctx;
2981 GetByteContext gb;
2982 int ret;
2983
2984 bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
2985
2986 if (avctx->extradata_size > 3 &&
2987 (avctx->extradata[0] || avctx->extradata[1] ||
2988 avctx->extradata[2] > 1)) {
2989 /* It seems the extradata is encoded as hvcC format.
2990 * Temporarily, we support configurationVersion==0 until 14496-15 3rd
2991 * is finalized. When finalized, configurationVersion will be 1 and we
2992 * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
2993 int i, j, num_arrays, nal_len_size;
2994
2995 s->is_nalff = 1;
2996
2997 bytestream2_skip(&gb, 21);
2998 nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
2999 num_arrays = bytestream2_get_byte(&gb);
3000
3001 /* nal units in the hvcC always have length coded with 2 bytes,
3002 * so put a fake nal_length_size = 2 while parsing them */
3003 s->nal_length_size = 2;
3004
3005 /* Decode nal units from hvcC. */
3006 for (i = 0; i < num_arrays; i++) {
3007 int type = bytestream2_get_byte(&gb) & 0x3f;
3008 int cnt = bytestream2_get_be16(&gb);
3009
3010 for (j = 0; j < cnt; j++) {
3011 // +2 for the nal size field
3012 int nalsize = bytestream2_peek_be16(&gb) + 2;
3013 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3014 av_log(s->avctx, AV_LOG_ERROR,
3015 "Invalid NAL unit size in extradata.\n");
3016 return AVERROR_INVALIDDATA;
3017 }
3018
3019 ret = decode_nal_units(s, gb.buffer, nalsize);
3020 if (ret < 0) {
3021 av_log(avctx, AV_LOG_ERROR,
3022 "Decoding nal unit %d %d from hvcC failed\n",
3023 type, i);
3024 return ret;
3025 }
3026 bytestream2_skip(&gb, nalsize);
3027 }
3028 }
3029
3030 /* Now store right nal length size, that will be used to parse
3031 * all other nals */
3032 s->nal_length_size = nal_len_size;
3033 } else {
3034 s->is_nalff = 0;
3035 ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3036 if (ret < 0)
3037 return ret;
3038 }
3039 return 0;
3040}
3041
3042static av_cold int hevc_decode_init(AVCodecContext *avctx)
3043{
3044 HEVCContext *s = avctx->priv_data;
3045 int ret;
3046
3047 ff_init_cabac_states();
3048
3049 avctx->internal->allocate_progress = 1;
3050
3051 ret = hevc_init_context(avctx);
3052 if (ret < 0)
3053 return ret;
3054
3055 if (avctx->extradata_size > 0 && avctx->extradata) {
3056 ret = hevc_decode_extradata(s);
3057 if (ret < 0) {
3058 hevc_decode_free(avctx);
3059 return ret;
3060 }
3061 }
3062
3063 return 0;
3064}
3065
3066static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3067{
3068 HEVCContext *s = avctx->priv_data;
3069 int ret;
3070
3071 memset(s, 0, sizeof(*s));
3072
3073 ret = hevc_init_context(avctx);
3074 if (ret < 0)
3075 return ret;
3076
3077 return 0;
3078}
3079
3080static void hevc_decode_flush(AVCodecContext *avctx)
3081{
3082 HEVCContext *s = avctx->priv_data;
3083 ff_hevc_flush_dpb(s);
3084 s->max_ra = INT_MAX;
3085}
3086
3087#define OFFSET(x) offsetof(HEVCContext, x)
3088#define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3089static const AVOption options[] = {
3090 { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3091 AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3092 { NULL },
3093};
3094
3095static const AVClass hevc_decoder_class = {
3096 .class_name = "HEVC decoder",
3097 .item_name = av_default_item_name,
3098 .option = options,
3099 .version = LIBAVUTIL_VERSION_INT,
3100};
3101
3102AVCodec ff_hevc_decoder = {
3103 .name = "hevc",
3104 .long_name = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3105 .type = AVMEDIA_TYPE_VIDEO,
3106 .id = AV_CODEC_ID_HEVC,
3107 .priv_data_size = sizeof(HEVCContext),
3108 .priv_class = &hevc_decoder_class,
3109 .init = hevc_decode_init,
3110 .close = hevc_decode_free,
3111 .decode = hevc_decode_frame,
3112 .flush = hevc_decode_flush,
3113 .update_thread_context = hevc_update_thread_context,
3114 .init_thread_copy = hevc_init_thread_copy,
3115 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3116 CODEC_CAP_FRAME_THREADS,
3117};