h2645_parse: add support for parsing h264
[libav.git] / libavcodec / hevc.c
1 /*
2 * HEVC video decoder
3 *
4 * Copyright (C) 2012 - 2013 Guillaume Martres
5 * Copyright (C) 2012 - 2013 Mickael Raulet
6 * Copyright (C) 2012 - 2013 Gildas Cocherel
7 * Copyright (C) 2012 - 2013 Wassim Hamidouche
8 *
9 * This file is part of Libav.
10 *
11 * Libav is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
15 *
16 * Libav is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with Libav; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 */
25
26 #include "libavutil/attributes.h"
27 #include "libavutil/common.h"
28 #include "libavutil/display.h"
29 #include "libavutil/internal.h"
30 #include "libavutil/md5.h"
31 #include "libavutil/opt.h"
32 #include "libavutil/pixdesc.h"
33 #include "libavutil/stereo3d.h"
34
35 #include "bswapdsp.h"
36 #include "bytestream.h"
37 #include "cabac_functions.h"
38 #include "golomb.h"
39 #include "hevc.h"
40 #include "profiles.h"
41
42 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 3 };
43 const uint8_t ff_hevc_qpel_extra_after[4] = { 0, 4, 4, 4 };
44 const uint8_t ff_hevc_qpel_extra[4] = { 0, 7, 7, 7 };
45
46 static const uint8_t scan_1x1[1] = { 0 };
47
48 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
49
50 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
51
52 static const uint8_t horiz_scan4x4_x[16] = {
53 0, 1, 2, 3,
54 0, 1, 2, 3,
55 0, 1, 2, 3,
56 0, 1, 2, 3,
57 };
58
59 static const uint8_t horiz_scan4x4_y[16] = {
60 0, 0, 0, 0,
61 1, 1, 1, 1,
62 2, 2, 2, 2,
63 3, 3, 3, 3,
64 };
65
66 static const uint8_t horiz_scan8x8_inv[8][8] = {
67 { 0, 1, 2, 3, 16, 17, 18, 19, },
68 { 4, 5, 6, 7, 20, 21, 22, 23, },
69 { 8, 9, 10, 11, 24, 25, 26, 27, },
70 { 12, 13, 14, 15, 28, 29, 30, 31, },
71 { 32, 33, 34, 35, 48, 49, 50, 51, },
72 { 36, 37, 38, 39, 52, 53, 54, 55, },
73 { 40, 41, 42, 43, 56, 57, 58, 59, },
74 { 44, 45, 46, 47, 60, 61, 62, 63, },
75 };
76
77 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
78
79 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
80
81 static const uint8_t diag_scan2x2_inv[2][2] = {
82 { 0, 2, },
83 { 1, 3, },
84 };
85
86 static const uint8_t diag_scan4x4_inv[4][4] = {
87 { 0, 2, 5, 9, },
88 { 1, 4, 8, 12, },
89 { 3, 7, 11, 14, },
90 { 6, 10, 13, 15, },
91 };
92
93 static const uint8_t diag_scan8x8_inv[8][8] = {
94 { 0, 2, 5, 9, 14, 20, 27, 35, },
95 { 1, 4, 8, 13, 19, 26, 34, 42, },
96 { 3, 7, 12, 18, 25, 33, 41, 48, },
97 { 6, 11, 17, 24, 32, 40, 47, 53, },
98 { 10, 16, 23, 31, 39, 46, 52, 57, },
99 { 15, 22, 30, 38, 45, 51, 56, 60, },
100 { 21, 29, 37, 44, 50, 55, 59, 62, },
101 { 28, 36, 43, 49, 54, 58, 61, 63, },
102 };
103
104 /**
105 * NOTE: Each function hls_foo correspond to the function foo in the
106 * specification (HLS stands for High Level Syntax).
107 */
108
109 /**
110 * Section 5.7
111 */
112
113 /* free everything allocated by pic_arrays_init() */
114 static void pic_arrays_free(HEVCContext *s)
115 {
116 av_freep(&s->sao);
117 av_freep(&s->deblock);
118
119 av_freep(&s->skip_flag);
120 av_freep(&s->tab_ct_depth);
121
122 av_freep(&s->tab_ipm);
123 av_freep(&s->cbf_luma);
124 av_freep(&s->is_pcm);
125
126 av_freep(&s->qp_y_tab);
127 av_freep(&s->tab_slice_address);
128 av_freep(&s->filter_slice_edges);
129
130 av_freep(&s->horizontal_bs);
131 av_freep(&s->vertical_bs);
132
133 av_buffer_pool_uninit(&s->tab_mvf_pool);
134 av_buffer_pool_uninit(&s->rpl_tab_pool);
135 }
136
137 /* allocate arrays that depend on frame dimensions */
138 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
139 {
140 int log2_min_cb_size = sps->log2_min_cb_size;
141 int width = sps->width;
142 int height = sps->height;
143 int pic_size_in_ctb = ((width >> log2_min_cb_size) + 1) *
144 ((height >> log2_min_cb_size) + 1);
145 int ctb_count = sps->ctb_width * sps->ctb_height;
146 int min_pu_size = sps->min_pu_width * sps->min_pu_height;
147
148 s->bs_width = width >> 3;
149 s->bs_height = height >> 3;
150
151 s->sao = av_mallocz_array(ctb_count, sizeof(*s->sao));
152 s->deblock = av_mallocz_array(ctb_count, sizeof(*s->deblock));
153 if (!s->sao || !s->deblock)
154 goto fail;
155
156 s->skip_flag = av_malloc(pic_size_in_ctb);
157 s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
158 if (!s->skip_flag || !s->tab_ct_depth)
159 goto fail;
160
161 s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
162 s->tab_ipm = av_mallocz(min_pu_size);
163 s->is_pcm = av_malloc(min_pu_size);
164 if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
165 goto fail;
166
167 s->filter_slice_edges = av_malloc(ctb_count);
168 s->tab_slice_address = av_malloc(pic_size_in_ctb *
169 sizeof(*s->tab_slice_address));
170 s->qp_y_tab = av_malloc(pic_size_in_ctb *
171 sizeof(*s->qp_y_tab));
172 if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
173 goto fail;
174
175 s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
176 s->vertical_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
177 if (!s->horizontal_bs || !s->vertical_bs)
178 goto fail;
179
180 s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
181 av_buffer_alloc);
182 s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
183 av_buffer_allocz);
184 if (!s->tab_mvf_pool || !s->rpl_tab_pool)
185 goto fail;
186
187 return 0;
188
189 fail:
190 pic_arrays_free(s);
191 return AVERROR(ENOMEM);
192 }
193
194 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
195 {
196 int i = 0;
197 int j = 0;
198 uint8_t luma_weight_l0_flag[16];
199 uint8_t chroma_weight_l0_flag[16];
200 uint8_t luma_weight_l1_flag[16];
201 uint8_t chroma_weight_l1_flag[16];
202
203 s->sh.luma_log2_weight_denom = av_clip(get_ue_golomb_long(gb), 0, 7);
204 if (s->ps.sps->chroma_format_idc != 0) {
205 int delta = get_se_golomb(gb);
206 s->sh.chroma_log2_weight_denom = av_clip(s->sh.luma_log2_weight_denom + delta, 0, 7);
207 }
208
209 for (i = 0; i < s->sh.nb_refs[L0]; i++) {
210 luma_weight_l0_flag[i] = get_bits1(gb);
211 if (!luma_weight_l0_flag[i]) {
212 s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
213 s->sh.luma_offset_l0[i] = 0;
214 }
215 }
216 if (s->ps.sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
217 for (i = 0; i < s->sh.nb_refs[L0]; i++)
218 chroma_weight_l0_flag[i] = get_bits1(gb);
219 } else {
220 for (i = 0; i < s->sh.nb_refs[L0]; i++)
221 chroma_weight_l0_flag[i] = 0;
222 }
223 for (i = 0; i < s->sh.nb_refs[L0]; i++) {
224 if (luma_weight_l0_flag[i]) {
225 int delta_luma_weight_l0 = get_se_golomb(gb);
226 s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
227 s->sh.luma_offset_l0[i] = get_se_golomb(gb);
228 }
229 if (chroma_weight_l0_flag[i]) {
230 for (j = 0; j < 2; j++) {
231 int delta_chroma_weight_l0 = get_se_golomb(gb);
232 int delta_chroma_offset_l0 = get_se_golomb(gb);
233 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
234 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
235 >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
236 }
237 } else {
238 s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
239 s->sh.chroma_offset_l0[i][0] = 0;
240 s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
241 s->sh.chroma_offset_l0[i][1] = 0;
242 }
243 }
244 if (s->sh.slice_type == B_SLICE) {
245 for (i = 0; i < s->sh.nb_refs[L1]; i++) {
246 luma_weight_l1_flag[i] = get_bits1(gb);
247 if (!luma_weight_l1_flag[i]) {
248 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
249 s->sh.luma_offset_l1[i] = 0;
250 }
251 }
252 if (s->ps.sps->chroma_format_idc != 0) {
253 for (i = 0; i < s->sh.nb_refs[L1]; i++)
254 chroma_weight_l1_flag[i] = get_bits1(gb);
255 } else {
256 for (i = 0; i < s->sh.nb_refs[L1]; i++)
257 chroma_weight_l1_flag[i] = 0;
258 }
259 for (i = 0; i < s->sh.nb_refs[L1]; i++) {
260 if (luma_weight_l1_flag[i]) {
261 int delta_luma_weight_l1 = get_se_golomb(gb);
262 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
263 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
264 }
265 if (chroma_weight_l1_flag[i]) {
266 for (j = 0; j < 2; j++) {
267 int delta_chroma_weight_l1 = get_se_golomb(gb);
268 int delta_chroma_offset_l1 = get_se_golomb(gb);
269 s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
270 s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
271 >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
272 }
273 } else {
274 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
275 s->sh.chroma_offset_l1[i][0] = 0;
276 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
277 s->sh.chroma_offset_l1[i][1] = 0;
278 }
279 }
280 }
281 }
282
283 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
284 {
285 const HEVCSPS *sps = s->ps.sps;
286 int max_poc_lsb = 1 << sps->log2_max_poc_lsb;
287 int prev_delta_msb = 0;
288 unsigned int nb_sps = 0, nb_sh;
289 int i;
290
291 rps->nb_refs = 0;
292 if (!sps->long_term_ref_pics_present_flag)
293 return 0;
294
295 if (sps->num_long_term_ref_pics_sps > 0)
296 nb_sps = get_ue_golomb_long(gb);
297 nb_sh = get_ue_golomb_long(gb);
298
299 if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
300 return AVERROR_INVALIDDATA;
301
302 rps->nb_refs = nb_sh + nb_sps;
303
304 for (i = 0; i < rps->nb_refs; i++) {
305 uint8_t delta_poc_msb_present;
306
307 if (i < nb_sps) {
308 uint8_t lt_idx_sps = 0;
309
310 if (sps->num_long_term_ref_pics_sps > 1)
311 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
312
313 rps->poc[i] = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
314 rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
315 } else {
316 rps->poc[i] = get_bits(gb, sps->log2_max_poc_lsb);
317 rps->used[i] = get_bits1(gb);
318 }
319
320 delta_poc_msb_present = get_bits1(gb);
321 if (delta_poc_msb_present) {
322 int delta = get_ue_golomb_long(gb);
323
324 if (i && i != nb_sps)
325 delta += prev_delta_msb;
326
327 rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
328 prev_delta_msb = delta;
329 }
330 }
331
332 return 0;
333 }
334
335 static void export_stream_params(AVCodecContext *avctx, const HEVCParamSets *ps,
336 const HEVCSPS *sps)
337 {
338 const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
339 unsigned int num = 0, den = 0;
340
341 avctx->pix_fmt = sps->pix_fmt;
342 avctx->coded_width = sps->width;
343 avctx->coded_height = sps->height;
344 avctx->width = sps->output_width;
345 avctx->height = sps->output_height;
346 avctx->has_b_frames = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
347 avctx->profile = sps->ptl.general_ptl.profile_idc;
348 avctx->level = sps->ptl.general_ptl.level_idc;
349
350 ff_set_sar(avctx, sps->vui.sar);
351
352 if (sps->vui.video_signal_type_present_flag)
353 avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
354 : AVCOL_RANGE_MPEG;
355 else
356 avctx->color_range = AVCOL_RANGE_MPEG;
357
358 if (sps->vui.colour_description_present_flag) {
359 avctx->color_primaries = sps->vui.colour_primaries;
360 avctx->color_trc = sps->vui.transfer_characteristic;
361 avctx->colorspace = sps->vui.matrix_coeffs;
362 } else {
363 avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
364 avctx->color_trc = AVCOL_TRC_UNSPECIFIED;
365 avctx->colorspace = AVCOL_SPC_UNSPECIFIED;
366 }
367
368 if (vps->vps_timing_info_present_flag) {
369 num = vps->vps_num_units_in_tick;
370 den = vps->vps_time_scale;
371 } else if (sps->vui.vui_timing_info_present_flag) {
372 num = sps->vui.vui_num_units_in_tick;
373 den = sps->vui.vui_time_scale;
374 }
375
376 if (num != 0 && den != 0)
377 av_reduce(&avctx->framerate.den, &avctx->framerate.num,
378 num, den, 1 << 30);
379 }
380
381 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
382 {
383 #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL + CONFIG_HEVC_VDPAU_HWACCEL)
384 enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
385 int ret;
386
387 pic_arrays_free(s);
388 s->ps.sps = NULL;
389 s->ps.vps = NULL;
390
391 if (!sps)
392 return 0;
393
394 ret = pic_arrays_init(s, sps);
395 if (ret < 0)
396 goto fail;
397
398 export_stream_params(s->avctx, &s->ps, sps);
399
400 if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
401 #if CONFIG_HEVC_DXVA2_HWACCEL
402 *fmt++ = AV_PIX_FMT_DXVA2_VLD;
403 #endif
404 #if CONFIG_HEVC_D3D11VA_HWACCEL
405 *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
406 #endif
407 #if CONFIG_HEVC_VDPAU_HWACCEL
408 *fmt++ = AV_PIX_FMT_VDPAU;
409 #endif
410 }
411
412 *fmt++ = sps->pix_fmt;
413 *fmt = AV_PIX_FMT_NONE;
414
415 ret = ff_get_format(s->avctx, pix_fmts);
416 if (ret < 0)
417 goto fail;
418 s->avctx->pix_fmt = ret;
419
420 ff_hevc_pred_init(&s->hpc, sps->bit_depth);
421 ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
422 ff_videodsp_init (&s->vdsp, sps->bit_depth);
423
424 if (sps->sao_enabled && !s->avctx->hwaccel) {
425 av_frame_unref(s->tmp_frame);
426 ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
427 if (ret < 0)
428 goto fail;
429 s->frame = s->tmp_frame;
430 }
431
432 s->ps.sps = sps;
433 s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
434
435 return 0;
436
437 fail:
438 pic_arrays_free(s);
439 s->ps.sps = NULL;
440 return ret;
441 }
442
443 static int hls_slice_header(HEVCContext *s)
444 {
445 GetBitContext *gb = &s->HEVClc.gb;
446 SliceHeader *sh = &s->sh;
447 int i, ret;
448
449 // Coded parameters
450 sh->first_slice_in_pic_flag = get_bits1(gb);
451 if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
452 s->seq_decode = (s->seq_decode + 1) & 0xff;
453 s->max_ra = INT_MAX;
454 if (IS_IDR(s))
455 ff_hevc_clear_refs(s);
456 }
457 if (IS_IRAP(s))
458 sh->no_output_of_prior_pics_flag = get_bits1(gb);
459
460 sh->pps_id = get_ue_golomb_long(gb);
461 if (sh->pps_id >= MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
462 av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
463 return AVERROR_INVALIDDATA;
464 }
465 if (!sh->first_slice_in_pic_flag &&
466 s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
467 av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
468 return AVERROR_INVALIDDATA;
469 }
470 s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
471
472 if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
473 s->ps.sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
474
475 ff_hevc_clear_refs(s);
476 ret = set_sps(s, s->ps.sps);
477 if (ret < 0)
478 return ret;
479
480 s->seq_decode = (s->seq_decode + 1) & 0xff;
481 s->max_ra = INT_MAX;
482 }
483
484 sh->dependent_slice_segment_flag = 0;
485 if (!sh->first_slice_in_pic_flag) {
486 int slice_address_length;
487
488 if (s->ps.pps->dependent_slice_segments_enabled_flag)
489 sh->dependent_slice_segment_flag = get_bits1(gb);
490
491 slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
492 s->ps.sps->ctb_height);
493 sh->slice_segment_addr = slice_address_length ? get_bits(gb, slice_address_length) : 0;
494 if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
495 av_log(s->avctx, AV_LOG_ERROR,
496 "Invalid slice segment address: %u.\n",
497 sh->slice_segment_addr);
498 return AVERROR_INVALIDDATA;
499 }
500
501 if (!sh->dependent_slice_segment_flag) {
502 sh->slice_addr = sh->slice_segment_addr;
503 s->slice_idx++;
504 }
505 } else {
506 sh->slice_segment_addr = sh->slice_addr = 0;
507 s->slice_idx = 0;
508 s->slice_initialized = 0;
509 }
510
511 if (!sh->dependent_slice_segment_flag) {
512 s->slice_initialized = 0;
513
514 for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
515 skip_bits(gb, 1); // slice_reserved_undetermined_flag[]
516
517 sh->slice_type = get_ue_golomb_long(gb);
518 if (!(sh->slice_type == I_SLICE ||
519 sh->slice_type == P_SLICE ||
520 sh->slice_type == B_SLICE)) {
521 av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
522 sh->slice_type);
523 return AVERROR_INVALIDDATA;
524 }
525 if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
526 av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
527 return AVERROR_INVALIDDATA;
528 }
529
530 // when flag is not present, picture is inferred to be output
531 sh->pic_output_flag = 1;
532 if (s->ps.pps->output_flag_present_flag)
533 sh->pic_output_flag = get_bits1(gb);
534
535 if (s->ps.sps->separate_colour_plane_flag)
536 sh->colour_plane_id = get_bits(gb, 2);
537
538 if (!IS_IDR(s)) {
539 int poc, pos;
540
541 sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
542 poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
543 if (!sh->first_slice_in_pic_flag && poc != s->poc) {
544 av_log(s->avctx, AV_LOG_WARNING,
545 "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
546 if (s->avctx->err_recognition & AV_EF_EXPLODE)
547 return AVERROR_INVALIDDATA;
548 poc = s->poc;
549 }
550 s->poc = poc;
551
552 sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
553 pos = get_bits_left(gb);
554 if (!sh->short_term_ref_pic_set_sps_flag) {
555 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
556 if (ret < 0)
557 return ret;
558
559 sh->short_term_rps = &sh->slice_rps;
560 } else {
561 int numbits, rps_idx;
562
563 if (!s->ps.sps->nb_st_rps) {
564 av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
565 return AVERROR_INVALIDDATA;
566 }
567
568 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
569 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
570 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
571 }
572 sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
573
574 pos = get_bits_left(gb);
575 ret = decode_lt_rps(s, &sh->long_term_rps, gb);
576 if (ret < 0) {
577 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
578 if (s->avctx->err_recognition & AV_EF_EXPLODE)
579 return AVERROR_INVALIDDATA;
580 }
581 sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
582
583 if (s->ps.sps->sps_temporal_mvp_enabled_flag)
584 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
585 else
586 sh->slice_temporal_mvp_enabled_flag = 0;
587 } else {
588 s->sh.short_term_rps = NULL;
589 s->poc = 0;
590 }
591
592 /* 8.3.1 */
593 if (s->temporal_id == 0 &&
594 s->nal_unit_type != NAL_TRAIL_N &&
595 s->nal_unit_type != NAL_TSA_N &&
596 s->nal_unit_type != NAL_STSA_N &&
597 s->nal_unit_type != NAL_RADL_N &&
598 s->nal_unit_type != NAL_RADL_R &&
599 s->nal_unit_type != NAL_RASL_N &&
600 s->nal_unit_type != NAL_RASL_R)
601 s->pocTid0 = s->poc;
602
603 if (s->ps.sps->sao_enabled) {
604 sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
605 sh->slice_sample_adaptive_offset_flag[1] =
606 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
607 } else {
608 sh->slice_sample_adaptive_offset_flag[0] = 0;
609 sh->slice_sample_adaptive_offset_flag[1] = 0;
610 sh->slice_sample_adaptive_offset_flag[2] = 0;
611 }
612
613 sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
614 if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
615 int nb_refs;
616
617 sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
618 if (sh->slice_type == B_SLICE)
619 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
620
621 if (get_bits1(gb)) { // num_ref_idx_active_override_flag
622 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
623 if (sh->slice_type == B_SLICE)
624 sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
625 }
626 if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
627 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
628 sh->nb_refs[L0], sh->nb_refs[L1]);
629 return AVERROR_INVALIDDATA;
630 }
631
632 sh->rpl_modification_flag[0] = 0;
633 sh->rpl_modification_flag[1] = 0;
634 nb_refs = ff_hevc_frame_nb_refs(s);
635 if (!nb_refs) {
636 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
637 return AVERROR_INVALIDDATA;
638 }
639
640 if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
641 sh->rpl_modification_flag[0] = get_bits1(gb);
642 if (sh->rpl_modification_flag[0]) {
643 for (i = 0; i < sh->nb_refs[L0]; i++)
644 sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
645 }
646
647 if (sh->slice_type == B_SLICE) {
648 sh->rpl_modification_flag[1] = get_bits1(gb);
649 if (sh->rpl_modification_flag[1] == 1)
650 for (i = 0; i < sh->nb_refs[L1]; i++)
651 sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
652 }
653 }
654
655 if (sh->slice_type == B_SLICE)
656 sh->mvd_l1_zero_flag = get_bits1(gb);
657
658 if (s->ps.pps->cabac_init_present_flag)
659 sh->cabac_init_flag = get_bits1(gb);
660 else
661 sh->cabac_init_flag = 0;
662
663 sh->collocated_ref_idx = 0;
664 if (sh->slice_temporal_mvp_enabled_flag) {
665 sh->collocated_list = L0;
666 if (sh->slice_type == B_SLICE)
667 sh->collocated_list = !get_bits1(gb);
668
669 if (sh->nb_refs[sh->collocated_list] > 1) {
670 sh->collocated_ref_idx = get_ue_golomb_long(gb);
671 if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
672 av_log(s->avctx, AV_LOG_ERROR,
673 "Invalid collocated_ref_idx: %d.\n",
674 sh->collocated_ref_idx);
675 return AVERROR_INVALIDDATA;
676 }
677 }
678 }
679
680 if ((s->ps.pps->weighted_pred_flag && sh->slice_type == P_SLICE) ||
681 (s->ps.pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
682 pred_weight_table(s, gb);
683 }
684
685 sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
686 if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
687 av_log(s->avctx, AV_LOG_ERROR,
688 "Invalid number of merging MVP candidates: %d.\n",
689 sh->max_num_merge_cand);
690 return AVERROR_INVALIDDATA;
691 }
692 }
693
694 sh->slice_qp_delta = get_se_golomb(gb);
695
696 if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
697 sh->slice_cb_qp_offset = get_se_golomb(gb);
698 sh->slice_cr_qp_offset = get_se_golomb(gb);
699 } else {
700 sh->slice_cb_qp_offset = 0;
701 sh->slice_cr_qp_offset = 0;
702 }
703
704 if (s->ps.pps->deblocking_filter_control_present_flag) {
705 int deblocking_filter_override_flag = 0;
706
707 if (s->ps.pps->deblocking_filter_override_enabled_flag)
708 deblocking_filter_override_flag = get_bits1(gb);
709
710 if (deblocking_filter_override_flag) {
711 sh->disable_deblocking_filter_flag = get_bits1(gb);
712 if (!sh->disable_deblocking_filter_flag) {
713 sh->beta_offset = get_se_golomb(gb) * 2;
714 sh->tc_offset = get_se_golomb(gb) * 2;
715 }
716 } else {
717 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
718 sh->beta_offset = s->ps.pps->beta_offset;
719 sh->tc_offset = s->ps.pps->tc_offset;
720 }
721 } else {
722 sh->disable_deblocking_filter_flag = 0;
723 sh->beta_offset = 0;
724 sh->tc_offset = 0;
725 }
726
727 if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
728 (sh->slice_sample_adaptive_offset_flag[0] ||
729 sh->slice_sample_adaptive_offset_flag[1] ||
730 !sh->disable_deblocking_filter_flag)) {
731 sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
732 } else {
733 sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
734 }
735 } else if (!s->slice_initialized) {
736 av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
737 return AVERROR_INVALIDDATA;
738 }
739
740 sh->num_entry_point_offsets = 0;
741 if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
742 sh->num_entry_point_offsets = get_ue_golomb_long(gb);
743 if (sh->num_entry_point_offsets > 0) {
744 int offset_len = get_ue_golomb_long(gb) + 1;
745
746 for (i = 0; i < sh->num_entry_point_offsets; i++)
747 skip_bits(gb, offset_len);
748 }
749 }
750
751 if (s->ps.pps->slice_header_extension_present_flag) {
752 unsigned int length = get_ue_golomb_long(gb);
753 for (i = 0; i < length; i++)
754 skip_bits(gb, 8); // slice_header_extension_data_byte
755 }
756
757 // Inferred parameters
758 sh->slice_qp = 26 + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
759 if (sh->slice_qp > 51 ||
760 sh->slice_qp < -s->ps.sps->qp_bd_offset) {
761 av_log(s->avctx, AV_LOG_ERROR,
762 "The slice_qp %d is outside the valid range "
763 "[%d, 51].\n",
764 sh->slice_qp,
765 -s->ps.sps->qp_bd_offset);
766 return AVERROR_INVALIDDATA;
767 }
768
769 sh->slice_ctb_addr_rs = sh->slice_segment_addr;
770
771 if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
772 av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
773 return AVERROR_INVALIDDATA;
774 }
775
776 s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
777
778 if (!s->ps.pps->cu_qp_delta_enabled_flag)
779 s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->ps.sps->qp_bd_offset,
780 52 + s->ps.sps->qp_bd_offset) - s->ps.sps->qp_bd_offset;
781
782 s->slice_initialized = 1;
783
784 return 0;
785 }
786
787 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
788
789 #define SET_SAO(elem, value) \
790 do { \
791 if (!sao_merge_up_flag && !sao_merge_left_flag) \
792 sao->elem = value; \
793 else if (sao_merge_left_flag) \
794 sao->elem = CTB(s->sao, rx-1, ry).elem; \
795 else if (sao_merge_up_flag) \
796 sao->elem = CTB(s->sao, rx, ry-1).elem; \
797 else \
798 sao->elem = 0; \
799 } while (0)
800
801 static void hls_sao_param(HEVCContext *s, int rx, int ry)
802 {
803 HEVCLocalContext *lc = &s->HEVClc;
804 int sao_merge_left_flag = 0;
805 int sao_merge_up_flag = 0;
806 int shift = s->ps.sps->bit_depth - FFMIN(s->ps.sps->bit_depth, 10);
807 SAOParams *sao = &CTB(s->sao, rx, ry);
808 int c_idx, i;
809
810 if (s->sh.slice_sample_adaptive_offset_flag[0] ||
811 s->sh.slice_sample_adaptive_offset_flag[1]) {
812 if (rx > 0) {
813 if (lc->ctb_left_flag)
814 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
815 }
816 if (ry > 0 && !sao_merge_left_flag) {
817 if (lc->ctb_up_flag)
818 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
819 }
820 }
821
822 for (c_idx = 0; c_idx < 3; c_idx++) {
823 if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
824 sao->type_idx[c_idx] = SAO_NOT_APPLIED;
825 continue;
826 }
827
828 if (c_idx == 2) {
829 sao->type_idx[2] = sao->type_idx[1];
830 sao->eo_class[2] = sao->eo_class[1];
831 } else {
832 SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
833 }
834
835 if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
836 continue;
837
838 for (i = 0; i < 4; i++)
839 SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
840
841 if (sao->type_idx[c_idx] == SAO_BAND) {
842 for (i = 0; i < 4; i++) {
843 if (sao->offset_abs[c_idx][i]) {
844 SET_SAO(offset_sign[c_idx][i],
845 ff_hevc_sao_offset_sign_decode(s));
846 } else {
847 sao->offset_sign[c_idx][i] = 0;
848 }
849 }
850 SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
851 } else if (c_idx != 2) {
852 SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
853 }
854
855 // Inferred parameters
856 sao->offset_val[c_idx][0] = 0;
857 for (i = 0; i < 4; i++) {
858 sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
859 if (sao->type_idx[c_idx] == SAO_EDGE) {
860 if (i > 1)
861 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
862 } else if (sao->offset_sign[c_idx][i]) {
863 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
864 }
865 }
866 }
867 }
868
869 #undef SET_SAO
870 #undef CTB
871
872 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
873 int log2_trafo_size, enum ScanType scan_idx,
874 int c_idx)
875 {
876 #define GET_COORD(offset, n) \
877 do { \
878 x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n]; \
879 y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n]; \
880 } while (0)
881 HEVCLocalContext *lc = &s->HEVClc;
882 int transform_skip_flag = 0;
883
884 int last_significant_coeff_x, last_significant_coeff_y;
885 int last_scan_pos;
886 int n_end;
887 int num_coeff = 0;
888 int greater1_ctx = 1;
889
890 int num_last_subset;
891 int x_cg_last_sig, y_cg_last_sig;
892
893 const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
894
895 ptrdiff_t stride = s->frame->linesize[c_idx];
896 int hshift = s->ps.sps->hshift[c_idx];
897 int vshift = s->ps.sps->vshift[c_idx];
898 uint8_t *dst = &s->frame->data[c_idx][(y0 >> vshift) * stride +
899 ((x0 >> hshift) << s->ps.sps->pixel_shift)];
900 DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
901 DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
902
903 int trafo_size = 1 << log2_trafo_size;
904 int i, qp, shift, add, scale, scale_m;
905 const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
906 const uint8_t *scale_matrix;
907 uint8_t dc_scale;
908
909 // Derive QP for dequant
910 if (!lc->cu.cu_transquant_bypass_flag) {
911 static const int qp_c[] = {
912 29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
913 };
914
915 static const uint8_t rem6[51 + 2 * 6 + 1] = {
916 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
917 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
918 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
919 };
920
921 static const uint8_t div6[51 + 2 * 6 + 1] = {
922 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3,
923 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6,
924 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
925 };
926 int qp_y = lc->qp_y;
927
928 if (c_idx == 0) {
929 qp = qp_y + s->ps.sps->qp_bd_offset;
930 } else {
931 int qp_i, offset;
932
933 if (c_idx == 1)
934 offset = s->ps.pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
935 else
936 offset = s->ps.pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
937
938 qp_i = av_clip(qp_y + offset, -s->ps.sps->qp_bd_offset, 57);
939 if (qp_i < 30)
940 qp = qp_i;
941 else if (qp_i > 43)
942 qp = qp_i - 6;
943 else
944 qp = qp_c[qp_i - 30];
945
946 qp += s->ps.sps->qp_bd_offset;
947 }
948
949 shift = s->ps.sps->bit_depth + log2_trafo_size - 5;
950 add = 1 << (shift - 1);
951 scale = level_scale[rem6[qp]] << (div6[qp]);
952 scale_m = 16; // default when no custom scaling lists.
953 dc_scale = 16;
954
955 if (s->ps.sps->scaling_list_enable_flag) {
956 const ScalingList *sl = s->ps.pps->scaling_list_data_present_flag ?
957 &s->ps.pps->scaling_list : &s->ps.sps->scaling_list;
958 int matrix_id = lc->cu.pred_mode != MODE_INTRA;
959
960 if (log2_trafo_size != 5)
961 matrix_id = 3 * matrix_id + c_idx;
962
963 scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
964 if (log2_trafo_size >= 4)
965 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
966 }
967 }
968
969 if (s->ps.pps->transform_skip_enabled_flag &&
970 !lc->cu.cu_transquant_bypass_flag &&
971 log2_trafo_size == 2) {
972 transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
973 }
974
975 last_significant_coeff_x =
976 ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
977 last_significant_coeff_y =
978 ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
979
980 if (last_significant_coeff_x > 3) {
981 int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
982 last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
983 (2 + (last_significant_coeff_x & 1)) +
984 suffix;
985 }
986
987 if (last_significant_coeff_y > 3) {
988 int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
989 last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
990 (2 + (last_significant_coeff_y & 1)) +
991 suffix;
992 }
993
994 if (scan_idx == SCAN_VERT)
995 FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
996
997 x_cg_last_sig = last_significant_coeff_x >> 2;
998 y_cg_last_sig = last_significant_coeff_y >> 2;
999
1000 switch (scan_idx) {
1001 case SCAN_DIAG: {
1002 int last_x_c = last_significant_coeff_x & 3;
1003 int last_y_c = last_significant_coeff_y & 3;
1004
1005 scan_x_off = ff_hevc_diag_scan4x4_x;
1006 scan_y_off = ff_hevc_diag_scan4x4_y;
1007 num_coeff = diag_scan4x4_inv[last_y_c][last_x_c];
1008 if (trafo_size == 4) {
1009 scan_x_cg = scan_1x1;
1010 scan_y_cg = scan_1x1;
1011 } else if (trafo_size == 8) {
1012 num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1013 scan_x_cg = diag_scan2x2_x;
1014 scan_y_cg = diag_scan2x2_y;
1015 } else if (trafo_size == 16) {
1016 num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1017 scan_x_cg = ff_hevc_diag_scan4x4_x;
1018 scan_y_cg = ff_hevc_diag_scan4x4_y;
1019 } else { // trafo_size == 32
1020 num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1021 scan_x_cg = ff_hevc_diag_scan8x8_x;
1022 scan_y_cg = ff_hevc_diag_scan8x8_y;
1023 }
1024 break;
1025 }
1026 case SCAN_HORIZ:
1027 scan_x_cg = horiz_scan2x2_x;
1028 scan_y_cg = horiz_scan2x2_y;
1029 scan_x_off = horiz_scan4x4_x;
1030 scan_y_off = horiz_scan4x4_y;
1031 num_coeff = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1032 break;
1033 default: //SCAN_VERT
1034 scan_x_cg = horiz_scan2x2_y;
1035 scan_y_cg = horiz_scan2x2_x;
1036 scan_x_off = horiz_scan4x4_y;
1037 scan_y_off = horiz_scan4x4_x;
1038 num_coeff = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1039 break;
1040 }
1041 num_coeff++;
1042 num_last_subset = (num_coeff - 1) >> 4;
1043
1044 for (i = num_last_subset; i >= 0; i--) {
1045 int n, m;
1046 int x_cg, y_cg, x_c, y_c;
1047 int implicit_non_zero_coeff = 0;
1048 int64_t trans_coeff_level;
1049 int prev_sig = 0;
1050 int offset = i << 4;
1051
1052 uint8_t significant_coeff_flag_idx[16];
1053 uint8_t nb_significant_coeff_flag = 0;
1054
1055 x_cg = scan_x_cg[i];
1056 y_cg = scan_y_cg[i];
1057
1058 if (i < num_last_subset && i > 0) {
1059 int ctx_cg = 0;
1060 if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1061 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1062 if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1063 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1064
1065 significant_coeff_group_flag[x_cg][y_cg] =
1066 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1067 implicit_non_zero_coeff = 1;
1068 } else {
1069 significant_coeff_group_flag[x_cg][y_cg] =
1070 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1071 (x_cg == 0 && y_cg == 0));
1072 }
1073
1074 last_scan_pos = num_coeff - offset - 1;
1075
1076 if (i == num_last_subset) {
1077 n_end = last_scan_pos - 1;
1078 significant_coeff_flag_idx[0] = last_scan_pos;
1079 nb_significant_coeff_flag = 1;
1080 } else {
1081 n_end = 15;
1082 }
1083
1084 if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1085 prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1086 if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1087 prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1088
1089 for (n = n_end; n >= 0; n--) {
1090 GET_COORD(offset, n);
1091
1092 if (significant_coeff_group_flag[x_cg][y_cg] &&
1093 (n > 0 || implicit_non_zero_coeff == 0)) {
1094 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1095 log2_trafo_size,
1096 scan_idx,
1097 prev_sig) == 1) {
1098 significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1099 nb_significant_coeff_flag++;
1100 implicit_non_zero_coeff = 0;
1101 }
1102 } else {
1103 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1104 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1105 significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1106 nb_significant_coeff_flag++;
1107 }
1108 }
1109 }
1110
1111 n_end = nb_significant_coeff_flag;
1112
1113 if (n_end) {
1114 int first_nz_pos_in_cg = 16;
1115 int last_nz_pos_in_cg = -1;
1116 int c_rice_param = 0;
1117 int first_greater1_coeff_idx = -1;
1118 uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1119 uint16_t coeff_sign_flag;
1120 int sum_abs = 0;
1121 int sign_hidden = 0;
1122
1123 // initialize first elem of coeff_bas_level_greater1_flag
1124 int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1125
1126 if (!(i == num_last_subset) && greater1_ctx == 0)
1127 ctx_set++;
1128 greater1_ctx = 1;
1129 last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1130
1131 for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1132 int n_idx = significant_coeff_flag_idx[m];
1133 int inc = (ctx_set << 2) + greater1_ctx;
1134 coeff_abs_level_greater1_flag[n_idx] =
1135 ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1136 if (coeff_abs_level_greater1_flag[n_idx]) {
1137 greater1_ctx = 0;
1138 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1139 greater1_ctx++;
1140 }
1141
1142 if (coeff_abs_level_greater1_flag[n_idx] &&
1143 first_greater1_coeff_idx == -1)
1144 first_greater1_coeff_idx = n_idx;
1145 }
1146 first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1147 sign_hidden = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1148 !lc->cu.cu_transquant_bypass_flag;
1149
1150 if (first_greater1_coeff_idx != -1) {
1151 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1152 }
1153 if (!s->ps.pps->sign_data_hiding_flag || !sign_hidden) {
1154 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1155 } else {
1156 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1157 }
1158
1159 for (m = 0; m < n_end; m++) {
1160 n = significant_coeff_flag_idx[m];
1161 GET_COORD(offset, n);
1162 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1163 if (trans_coeff_level == ((m < 8) ?
1164 ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1165 int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1166
1167 trans_coeff_level += last_coeff_abs_level_remaining;
1168 if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1169 c_rice_param = FFMIN(c_rice_param + 1, 4);
1170 }
1171 if (s->ps.pps->sign_data_hiding_flag && sign_hidden) {
1172 sum_abs += trans_coeff_level;
1173 if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1174 trans_coeff_level = -trans_coeff_level;
1175 }
1176 if (coeff_sign_flag >> 15)
1177 trans_coeff_level = -trans_coeff_level;
1178 coeff_sign_flag <<= 1;
1179 if (!lc->cu.cu_transquant_bypass_flag) {
1180 if (s->ps.sps->scaling_list_enable_flag) {
1181 if (y_c || x_c || log2_trafo_size < 4) {
1182 int pos;
1183 switch (log2_trafo_size) {
1184 case 3: pos = (y_c << 3) + x_c; break;
1185 case 4: pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1186 case 5: pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1187 default: pos = (y_c << 2) + x_c;
1188 }
1189 scale_m = scale_matrix[pos];
1190 } else {
1191 scale_m = dc_scale;
1192 }
1193 }
1194 trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1195 if(trans_coeff_level < 0) {
1196 if((~trans_coeff_level) & 0xFffffffffff8000)
1197 trans_coeff_level = -32768;
1198 } else {
1199 if (trans_coeff_level & 0xffffffffffff8000)
1200 trans_coeff_level = 32767;
1201 }
1202 }
1203 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1204 }
1205 }
1206 }
1207
1208 if (lc->cu.cu_transquant_bypass_flag) {
1209 s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1210 } else {
1211 if (transform_skip_flag)
1212 s->hevcdsp.transform_skip(dst, coeffs, stride);
1213 else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1214 log2_trafo_size == 2)
1215 s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1216 else
1217 s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1218 }
1219 }
1220
1221 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1222 int xBase, int yBase, int cb_xBase, int cb_yBase,
1223 int log2_cb_size, int log2_trafo_size,
1224 int blk_idx, int cbf_luma, int cbf_cb, int cbf_cr)
1225 {
1226 HEVCLocalContext *lc = &s->HEVClc;
1227
1228 if (lc->cu.pred_mode == MODE_INTRA) {
1229 int trafo_size = 1 << log2_trafo_size;
1230 ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1231
1232 s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1233 if (log2_trafo_size > 2) {
1234 trafo_size = trafo_size << (s->ps.sps->hshift[1] - 1);
1235 ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1236 s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1);
1237 s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2);
1238 } else if (blk_idx == 3) {
1239 trafo_size = trafo_size << s->ps.sps->hshift[1];
1240 ff_hevc_set_neighbour_available(s, xBase, yBase,
1241 trafo_size, trafo_size);
1242 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1243 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1244 }
1245 }
1246
1247 if (cbf_luma || cbf_cb || cbf_cr) {
1248 int scan_idx = SCAN_DIAG;
1249 int scan_idx_c = SCAN_DIAG;
1250
1251 if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1252 lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1253 if (lc->tu.cu_qp_delta != 0)
1254 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1255 lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1256 lc->tu.is_cu_qp_delta_coded = 1;
1257
1258 if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
1259 lc->tu.cu_qp_delta > (25 + s->ps.sps->qp_bd_offset / 2)) {
1260 av_log(s->avctx, AV_LOG_ERROR,
1261 "The cu_qp_delta %d is outside the valid range "
1262 "[%d, %d].\n",
1263 lc->tu.cu_qp_delta,
1264 -(26 + s->ps.sps->qp_bd_offset / 2),
1265 (25 + s->ps.sps->qp_bd_offset / 2));
1266 return AVERROR_INVALIDDATA;
1267 }
1268
1269 ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1270 }
1271
1272 if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1273 if (lc->tu.cur_intra_pred_mode >= 6 &&
1274 lc->tu.cur_intra_pred_mode <= 14) {
1275 scan_idx = SCAN_VERT;
1276 } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1277 lc->tu.cur_intra_pred_mode <= 30) {
1278 scan_idx = SCAN_HORIZ;
1279 }
1280
1281 if (lc->pu.intra_pred_mode_c >= 6 &&
1282 lc->pu.intra_pred_mode_c <= 14) {
1283 scan_idx_c = SCAN_VERT;
1284 } else if (lc->pu.intra_pred_mode_c >= 22 &&
1285 lc->pu.intra_pred_mode_c <= 30) {
1286 scan_idx_c = SCAN_HORIZ;
1287 }
1288 }
1289
1290 if (cbf_luma)
1291 hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1292 if (log2_trafo_size > 2) {
1293 if (cbf_cb)
1294 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1295 if (cbf_cr)
1296 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1297 } else if (blk_idx == 3) {
1298 if (cbf_cb)
1299 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1300 if (cbf_cr)
1301 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1302 }
1303 }
1304 return 0;
1305 }
1306
1307 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1308 {
1309 int cb_size = 1 << log2_cb_size;
1310 int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1311
1312 int min_pu_width = s->ps.sps->min_pu_width;
1313 int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1314 int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1315 int i, j;
1316
1317 for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1318 for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1319 s->is_pcm[i + j * min_pu_width] = 2;
1320 }
1321
1322 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1323 int xBase, int yBase, int cb_xBase, int cb_yBase,
1324 int log2_cb_size, int log2_trafo_size,
1325 int trafo_depth, int blk_idx,
1326 int cbf_cb, int cbf_cr)
1327 {
1328 HEVCLocalContext *lc = &s->HEVClc;
1329 uint8_t split_transform_flag;
1330 int ret;
1331
1332 if (lc->cu.intra_split_flag) {
1333 if (trafo_depth == 1)
1334 lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1335 } else {
1336 lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1337 }
1338
1339 if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1340 log2_trafo_size > s->ps.sps->log2_min_tb_size &&
1341 trafo_depth < lc->cu.max_trafo_depth &&
1342 !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1343 split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1344 } else {
1345 int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1346 lc->cu.pred_mode == MODE_INTER &&
1347 lc->cu.part_mode != PART_2Nx2N &&
1348 trafo_depth == 0;
1349
1350 split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1351 (lc->cu.intra_split_flag && trafo_depth == 0) ||
1352 inter_split;
1353 }
1354
1355 if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cb))
1356 cbf_cb = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1357 else if (log2_trafo_size > 2 || trafo_depth == 0)
1358 cbf_cb = 0;
1359 if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cr))
1360 cbf_cr = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1361 else if (log2_trafo_size > 2 || trafo_depth == 0)
1362 cbf_cr = 0;
1363
1364 if (split_transform_flag) {
1365 const int trafo_size_split = 1 << (log2_trafo_size - 1);
1366 const int x1 = x0 + trafo_size_split;
1367 const int y1 = y0 + trafo_size_split;
1368
1369 #define SUBDIVIDE(x, y, idx) \
1370 do { \
1371 ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1372 log2_trafo_size - 1, trafo_depth + 1, idx, \
1373 cbf_cb, cbf_cr); \
1374 if (ret < 0) \
1375 return ret; \
1376 } while (0)
1377
1378 SUBDIVIDE(x0, y0, 0);
1379 SUBDIVIDE(x1, y0, 1);
1380 SUBDIVIDE(x0, y1, 2);
1381 SUBDIVIDE(x1, y1, 3);
1382
1383 #undef SUBDIVIDE
1384 } else {
1385 int min_tu_size = 1 << s->ps.sps->log2_min_tb_size;
1386 int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1387 int min_tu_width = s->ps.sps->min_tb_width;
1388 int cbf_luma = 1;
1389
1390 if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1391 cbf_cb || cbf_cr)
1392 cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1393
1394 ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1395 log2_cb_size, log2_trafo_size,
1396 blk_idx, cbf_luma, cbf_cb, cbf_cr);
1397 if (ret < 0)
1398 return ret;
1399 // TODO: store cbf_luma somewhere else
1400 if (cbf_luma) {
1401 int i, j;
1402 for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1403 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1404 int x_tu = (x0 + j) >> log2_min_tu_size;
1405 int y_tu = (y0 + i) >> log2_min_tu_size;
1406 s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1407 }
1408 }
1409 if (!s->sh.disable_deblocking_filter_flag) {
1410 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1411 if (s->ps.pps->transquant_bypass_enable_flag &&
1412 lc->cu.cu_transquant_bypass_flag)
1413 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1414 }
1415 }
1416 return 0;
1417 }
1418
1419 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1420 {
1421 //TODO: non-4:2:0 support
1422 HEVCLocalContext *lc = &s->HEVClc;
1423 GetBitContext gb;
1424 int cb_size = 1 << log2_cb_size;
1425 int stride0 = s->frame->linesize[0];
1426 uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1427 int stride1 = s->frame->linesize[1];
1428 uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1429 int stride2 = s->frame->linesize[2];
1430 uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1431
1432 int length = cb_size * cb_size * s->ps.sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->ps.sps->pcm.bit_depth_chroma;
1433 const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1434 int ret;
1435
1436 if (!s->sh.disable_deblocking_filter_flag)
1437 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1438
1439 ret = init_get_bits(&gb, pcm, length);
1440 if (ret < 0)
1441 return ret;
1442
1443 s->hevcdsp.put_pcm(dst0, stride0, cb_size, &gb, s->ps.sps->pcm.bit_depth);
1444 s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->ps.sps->pcm.bit_depth_chroma);
1445 s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->ps.sps->pcm.bit_depth_chroma);
1446 return 0;
1447 }
1448
1449 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1450 {
1451 HEVCLocalContext *lc = &s->HEVClc;
1452 int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1453 int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1454
1455 if (x)
1456 x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1457 if (y)
1458 y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1459
1460 switch (x) {
1461 case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s); break;
1462 case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1463 case 0: lc->pu.mvd.x = 0; break;
1464 }
1465
1466 switch (y) {
1467 case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s); break;
1468 case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1469 case 0: lc->pu.mvd.y = 0; break;
1470 }
1471 }
1472
1473 /**
1474 * 8.5.3.2.2.1 Luma sample interpolation process
1475 *
1476 * @param s HEVC decoding context
1477 * @param dst target buffer for block data at block position
1478 * @param dststride stride of the dst buffer
1479 * @param ref reference picture buffer at origin (0, 0)
1480 * @param mv motion vector (relative to block position) to get pixel data from
1481 * @param x_off horizontal position of block from origin (0, 0)
1482 * @param y_off vertical position of block from origin (0, 0)
1483 * @param block_w width of block
1484 * @param block_h height of block
1485 */
1486 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1487 AVFrame *ref, const Mv *mv, int x_off, int y_off,
1488 int block_w, int block_h, int pred_idx)
1489 {
1490 HEVCLocalContext *lc = &s->HEVClc;
1491 uint8_t *src = ref->data[0];
1492 ptrdiff_t srcstride = ref->linesize[0];
1493 int pic_width = s->ps.sps->width;
1494 int pic_height = s->ps.sps->height;
1495
1496 int mx = mv->x & 3;
1497 int my = mv->y & 3;
1498 int extra_left = ff_hevc_qpel_extra_before[mx];
1499 int extra_top = ff_hevc_qpel_extra_before[my];
1500
1501 x_off += mv->x >> 2;
1502 y_off += mv->y >> 2;
1503 src += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1504
1505 if (x_off < extra_left || y_off < extra_top ||
1506 x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1507 y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1508 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1509 int offset = extra_top * srcstride + (extra_left << s->ps.sps->pixel_shift);
1510 int buf_offset = extra_top *
1511 edge_emu_stride + (extra_left << s->ps.sps->pixel_shift);
1512
1513 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1514 edge_emu_stride, srcstride,
1515 block_w + ff_hevc_qpel_extra[mx],
1516 block_h + ff_hevc_qpel_extra[my],
1517 x_off - extra_left, y_off - extra_top,
1518 pic_width, pic_height);
1519 src = lc->edge_emu_buffer + buf_offset;
1520 srcstride = edge_emu_stride;
1521 }
1522 s->hevcdsp.put_hevc_qpel[!!my][!!mx][pred_idx](dst, dststride, src, srcstride,
1523 block_h, mx, my, lc->mc_buffer);
1524 }
1525
1526 /**
1527 * 8.5.3.2.2.2 Chroma sample interpolation process
1528 *
1529 * @param s HEVC decoding context
1530 * @param dst1 target buffer for block data at block position (U plane)
1531 * @param dst2 target buffer for block data at block position (V plane)
1532 * @param dststride stride of the dst1 and dst2 buffers
1533 * @param ref reference picture buffer at origin (0, 0)
1534 * @param mv motion vector (relative to block position) to get pixel data from
1535 * @param x_off horizontal position of block from origin (0, 0)
1536 * @param y_off vertical position of block from origin (0, 0)
1537 * @param block_w width of block
1538 * @param block_h height of block
1539 */
1540 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1541 ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1542 int x_off, int y_off, int block_w, int block_h, int pred_idx)
1543 {
1544 HEVCLocalContext *lc = &s->HEVClc;
1545 uint8_t *src1 = ref->data[1];
1546 uint8_t *src2 = ref->data[2];
1547 ptrdiff_t src1stride = ref->linesize[1];
1548 ptrdiff_t src2stride = ref->linesize[2];
1549 int pic_width = s->ps.sps->width >> 1;
1550 int pic_height = s->ps.sps->height >> 1;
1551
1552 int mx = mv->x & 7;
1553 int my = mv->y & 7;
1554
1555 x_off += mv->x >> 3;
1556 y_off += mv->y >> 3;
1557 src1 += y_off * src1stride + (x_off * (1 << s->ps.sps->pixel_shift));
1558 src2 += y_off * src2stride + (x_off * (1 << s->ps.sps->pixel_shift));
1559
1560 if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1561 x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1562 y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1563 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1564 int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1565 int buf_offset1 = EPEL_EXTRA_BEFORE *
1566 (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1567 int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1568 int buf_offset2 = EPEL_EXTRA_BEFORE *
1569 (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1570
1571 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1572 edge_emu_stride, src1stride,
1573 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1574 x_off - EPEL_EXTRA_BEFORE,
1575 y_off - EPEL_EXTRA_BEFORE,
1576 pic_width, pic_height);
1577
1578 src1 = lc->edge_emu_buffer + buf_offset1;
1579 src1stride = edge_emu_stride;
1580 s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst1, dststride, src1, src1stride,
1581 block_h, mx, my, lc->mc_buffer);
1582
1583 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1584 edge_emu_stride, src2stride,
1585 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1586 x_off - EPEL_EXTRA_BEFORE,
1587 y_off - EPEL_EXTRA_BEFORE,
1588 pic_width, pic_height);
1589 src2 = lc->edge_emu_buffer + buf_offset2;
1590 src2stride = edge_emu_stride;
1591
1592 s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst2, dststride, src2, src2stride,
1593 block_h, mx, my, lc->mc_buffer);
1594 } else {
1595 s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst1, dststride, src1, src1stride,
1596 block_h, mx, my, lc->mc_buffer);
1597 s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst2, dststride, src2, src2stride,
1598 block_h, mx, my, lc->mc_buffer);
1599 }
1600 }
1601
1602 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1603 const Mv *mv, int y0, int height)
1604 {
1605 int y = (mv->y >> 2) + y0 + height + 9;
1606 ff_thread_await_progress(&ref->tf, y, 0);
1607 }
1608
1609 static void hevc_luma_mv_mpv_mode(HEVCContext *s, int x0, int y0, int nPbW,
1610 int nPbH, int log2_cb_size, int part_idx,
1611 int merge_idx, MvField *mv)
1612 {
1613 HEVCLocalContext *lc = &s->HEVClc;
1614 enum InterPredIdc inter_pred_idc = PRED_L0;
1615 int mvp_flag;
1616
1617 ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1618 if (s->sh.slice_type == B_SLICE)
1619 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1620
1621 if (inter_pred_idc != PRED_L1) {
1622 if (s->sh.nb_refs[L0])
1623 mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1624
1625 mv->pred_flag[0] = 1;
1626 hls_mvd_coding(s, x0, y0, 0);
1627 mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1628 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1629 part_idx, merge_idx, mv, mvp_flag, 0);
1630 mv->mv[0].x += lc->pu.mvd.x;
1631 mv->mv[0].y += lc->pu.mvd.y;
1632 }
1633
1634 if (inter_pred_idc != PRED_L0) {
1635 if (s->sh.nb_refs[L1])
1636 mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1637
1638 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1639 AV_ZERO32(&lc->pu.mvd);
1640 } else {
1641 hls_mvd_coding(s, x0, y0, 1);
1642 }
1643
1644 mv->pred_flag[1] = 1;
1645 mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1646 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1647 part_idx, merge_idx, mv, mvp_flag, 1);
1648 mv->mv[1].x += lc->pu.mvd.x;
1649 mv->mv[1].y += lc->pu.mvd.y;
1650 }
1651 }
1652
1653 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1654 int nPbW, int nPbH,
1655 int log2_cb_size, int partIdx)
1656 {
1657 static const int pred_indices[] = {
1658 [4] = 0, [8] = 1, [12] = 2, [16] = 3, [24] = 4, [32] = 5, [48] = 6, [64] = 7,
1659 };
1660 const int pred_idx = pred_indices[nPbW];
1661
1662 #define POS(c_idx, x, y) \
1663 &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1664 (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1665 HEVCLocalContext *lc = &s->HEVClc;
1666 int merge_idx = 0;
1667 struct MvField current_mv = {{{ 0 }}};
1668
1669 int min_pu_width = s->ps.sps->min_pu_width;
1670
1671 MvField *tab_mvf = s->ref->tab_mvf;
1672 RefPicList *refPicList = s->ref->refPicList;
1673 HEVCFrame *ref0, *ref1;
1674
1675 int tmpstride = MAX_PB_SIZE * sizeof(int16_t);
1676
1677 uint8_t *dst0 = POS(0, x0, y0);
1678 uint8_t *dst1 = POS(1, x0, y0);
1679 uint8_t *dst2 = POS(2, x0, y0);
1680 int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1681 int min_cb_width = s->ps.sps->min_cb_width;
1682 int x_cb = x0 >> log2_min_cb_size;
1683 int y_cb = y0 >> log2_min_cb_size;
1684 int x_pu, y_pu;
1685 int i, j;
1686
1687 int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1688
1689 if (!skip_flag)
1690 lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1691
1692 if (skip_flag || lc->pu.merge_flag) {
1693 if (s->sh.max_num_merge_cand > 1)
1694 merge_idx = ff_hevc_merge_idx_decode(s);
1695 else
1696 merge_idx = 0;
1697
1698 ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1699 partIdx, merge_idx, &current_mv);
1700 } else {
1701 hevc_luma_mv_mpv_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1702 partIdx, merge_idx, &current_mv);
1703 }
1704
1705 x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1706 y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1707
1708 for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1709 for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1710 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1711
1712 if (current_mv.pred_flag[0]) {
1713 ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1714 if (!ref0)
1715 return;
1716 hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1717 }
1718 if (current_mv.pred_flag[1]) {
1719 ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1720 if (!ref1)
1721 return;
1722 hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1723 }
1724
1725 if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1726 DECLARE_ALIGNED(16, int16_t, tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1727 DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1728
1729 luma_mc(s, tmp, tmpstride, ref0->frame,
1730 &current_mv.mv[0], x0, y0, nPbW, nPbH, pred_idx);
1731
1732 if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1733 (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1734 s->hevcdsp.weighted_pred[pred_idx](s->sh.luma_log2_weight_denom,
1735 s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1736 s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1737 dst0, s->frame->linesize[0], tmp,
1738 tmpstride, nPbH);
1739 } else {
1740 s->hevcdsp.put_unweighted_pred[pred_idx](dst0, s->frame->linesize[0], tmp, tmpstride, nPbH);
1741 }
1742 chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1743 &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1744
1745 if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1746 (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1747 s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1748 s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1749 s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1750 dst1, s->frame->linesize[1], tmp, tmpstride,
1751 nPbH / 2);
1752 s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1753 s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1754 s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1755 dst2, s->frame->linesize[2], tmp2, tmpstride,
1756 nPbH / 2);
1757 } else {
1758 s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst1, s->frame->linesize[1], tmp, tmpstride, nPbH / 2);
1759 s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH / 2);
1760 }
1761 } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1762 DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1763 DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1764
1765 luma_mc(s, tmp, tmpstride, ref1->frame,
1766 &current_mv.mv[1], x0, y0, nPbW, nPbH, pred_idx);
1767
1768 if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1769 (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1770 s->hevcdsp.weighted_pred[pred_idx](s->sh.luma_log2_weight_denom,
1771 s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1772 s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1773 dst0, s->frame->linesize[0], tmp, tmpstride,
1774 nPbH);
1775 } else {
1776 s->hevcdsp.put_unweighted_pred[pred_idx](dst0, s->frame->linesize[0], tmp, tmpstride, nPbH);
1777 }
1778
1779 chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1780 &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1781
1782 if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1783 (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1784 s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1785 s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1786 s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1787 dst1, s->frame->linesize[1], tmp, tmpstride, nPbH/2);
1788 s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1789 s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1790 s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1791 dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH/2);
1792 } else {
1793 s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst1, s->frame->linesize[1], tmp, tmpstride, nPbH / 2);
1794 s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH / 2);
1795 }
1796 } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1797 DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1798 DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1799 DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1800 DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1801
1802 luma_mc(s, tmp, tmpstride, ref0->frame,
1803 &current_mv.mv[0], x0, y0, nPbW, nPbH, pred_idx);
1804 luma_mc(s, tmp2, tmpstride, ref1->frame,
1805 &current_mv.mv[1], x0, y0, nPbW, nPbH, pred_idx);
1806
1807 if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1808 (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1809 s->hevcdsp.weighted_pred_avg[pred_idx](s->sh.luma_log2_weight_denom,
1810 s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1811 s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1812 s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1813 s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1814 dst0, s->frame->linesize[0],
1815 tmp, tmp2, tmpstride, nPbH);
1816 } else {
1817 s->hevcdsp.put_unweighted_pred_avg[pred_idx](dst0, s->frame->linesize[0],
1818 tmp, tmp2, tmpstride, nPbH);
1819 }
1820
1821 chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1822 &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1823 chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1824 &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1825
1826 if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1827 (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1828 s->hevcdsp.weighted_pred_avg_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1829 s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1830 s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1831 s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1832 s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1833 dst1, s->frame->linesize[1], tmp, tmp3,
1834 tmpstride, nPbH / 2);
1835 s->hevcdsp.weighted_pred_avg_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1836 s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1837 s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1838 s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1839 s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1840 dst2, s->frame->linesize[2], tmp2, tmp4,
1841 tmpstride, nPbH / 2);
1842 } else {
1843 s->hevcdsp.put_unweighted_pred_avg_chroma[pred_idx](dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbH/2);
1844 s->hevcdsp.put_unweighted_pred_avg_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbH/2);
1845 }
1846 }
1847 }
1848
1849 /**
1850 * 8.4.1
1851 */
1852 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1853 int prev_intra_luma_pred_flag)
1854 {
1855 HEVCLocalContext *lc = &s->HEVClc;
1856 int x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1857 int y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1858 int min_pu_width = s->ps.sps->min_pu_width;
1859 int size_in_pus = pu_size >> s->ps.sps->log2_min_pu_size;
1860 int x0b = x0 & ((1 << s->ps.sps->log2_ctb_size) - 1);
1861 int y0b = y0 & ((1 << s->ps.sps->log2_ctb_size) - 1);
1862
1863 int cand_up = (lc->ctb_up_flag || y0b) ?
1864 s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1865 int cand_left = (lc->ctb_left_flag || x0b) ?
1866 s->tab_ipm[y_pu * min_pu_width + x_pu - 1] : INTRA_DC;
1867
1868 int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1869
1870 MvField *tab_mvf = s->ref->tab_mvf;
1871 int intra_pred_mode;
1872 int candidate[3];
1873 int i, j;
1874
1875 // intra_pred_mode prediction does not cross vertical CTB boundaries
1876 if ((y0 - 1) < y_ctb)
1877 cand_up = INTRA_DC;
1878
1879 if (cand_left == cand_up) {
1880 if (cand_left < 2) {
1881 candidate[0] = INTRA_PLANAR;
1882 candidate[1] = INTRA_DC;
1883 candidate[2] = INTRA_ANGULAR_26;
1884 } else {
1885 candidate[0] = cand_left;
1886 candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1887 candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1888 }
1889 } else {
1890 candidate[0] = cand_left;
1891 candidate[1] = cand_up;
1892 if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1893 candidate[2] = INTRA_PLANAR;
1894 } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1895 candidate[2] = INTRA_DC;
1896 } else {
1897 candidate[2] = INTRA_ANGULAR_26;
1898 }
1899 }
1900
1901 if (prev_intra_luma_pred_flag) {
1902 intra_pred_mode = candidate[lc->pu.mpm_idx];
1903 } else {
1904 if (candidate[0] > candidate[1])
1905 FFSWAP(uint8_t, candidate[0], candidate[1]);
1906 if (candidate[0] > candidate[2])
1907 FFSWAP(uint8_t, candidate[0], candidate[2]);
1908 if (candidate[1] > candidate[2])
1909 FFSWAP(uint8_t, candidate[1], candidate[2]);
1910
1911 intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1912 for (i = 0; i < 3; i++)
1913 if (intra_pred_mode >= candidate[i])
1914 intra_pred_mode++;
1915 }
1916
1917 /* write the intra prediction units into the mv array */
1918 if (!size_in_pus)
1919 size_in_pus = 1;
1920 for (i = 0; i < size_in_pus; i++) {
1921 memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1922 intra_pred_mode, size_in_pus);
1923
1924 for (j = 0; j < size_in_pus; j++) {
1925 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra = 1;
1926 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1927 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1928 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0] = 0;
1929 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1] = 0;
1930 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x = 0;
1931 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y = 0;
1932 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x = 0;
1933 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y = 0;
1934 }
1935 }
1936
1937 return intra_pred_mode;
1938 }
1939
1940 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1941 int log2_cb_size, int ct_depth)
1942 {
1943 int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
1944 int x_cb = x0 >> s->ps.sps->log2_min_cb_size;
1945 int y_cb = y0 >> s->ps.sps->log2_min_cb_size;
1946 int y;
1947
1948 for (y = 0; y < length; y++)
1949 memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
1950 ct_depth, length);
1951 }
1952
1953 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1954 int log2_cb_size)
1955 {
1956 HEVCLocalContext *lc = &s->HEVClc;
1957 static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1958 uint8_t prev_intra_luma_pred_flag[4];
1959 int split = lc->cu.part_mode == PART_NxN;
1960 int pb_size = (1 << log2_cb_size) >> split;
1961 int side = split + 1;
1962 int chroma_mode;
1963 int i, j;
1964
1965 for (i = 0; i < side; i++)
1966 for (j = 0; j < side; j++)
1967 prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1968
1969 for (i = 0; i < side; i++) {
1970 for (j = 0; j < side; j++) {
1971 if (prev_intra_luma_pred_flag[2 * i + j])
1972 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1973 else
1974 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1975
1976 lc->pu.intra_pred_mode[2 * i + j] =
1977 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1978 prev_intra_luma_pred_flag[2 * i + j]);
1979 }
1980 }
1981
1982 chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1983 if (chroma_mode != 4) {
1984 if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1985 lc->pu.intra_pred_mode_c = 34;
1986 else
1987 lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
1988 } else {
1989 lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
1990 }
1991 }
1992
1993 static void intra_prediction_unit_default_value(HEVCContext *s,
1994 int x0, int y0,
1995 int log2_cb_size)
1996 {
1997 HEVCLocalContext *lc = &s->HEVClc;
1998 int pb_size = 1 << log2_cb_size;
1999 int size_in_pus = pb_size >> s->ps.sps->log2_min_pu_size;
2000 int min_pu_width = s->ps.sps->min_pu_width;
2001 MvField *tab_mvf = s->ref->tab_mvf;
2002 int x_pu = x0 >> s->ps.sps->log2_min_pu_size;
2003 int y_pu = y0 >> s->ps.sps->log2_min_pu_size;
2004 int j, k;
2005
2006 if (size_in_pus == 0)
2007 size_in_pus = 1;
2008 for (j = 0; j < size_in_pus; j++) {
2009 memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2010 for (k = 0; k < size_in_pus; k++)
2011 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2012 }
2013 }
2014
2015 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2016 {
2017 int cb_size = 1 << log2_cb_size;
2018 HEVCLocalContext *lc = &s->HEVClc;
2019 int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2020 int length = cb_size >> log2_min_cb_size;
2021 int min_cb_width = s->ps.sps->min_cb_width;
2022 int x_cb = x0 >> log2_min_cb_size;
2023 int y_cb = y0 >> log2_min_cb_size;
2024 int x, y, ret;
2025
2026 lc->cu.x = x0;
2027 lc->cu.y = y0;
2028 lc->cu.pred_mode = MODE_INTRA;
2029 lc->cu.part_mode = PART_2Nx2N;
2030 lc->cu.intra_split_flag = 0;
2031
2032 SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2033 for (x = 0; x < 4; x++)
2034 lc->pu.intra_pred_mode[x] = 1;
2035 if (s->ps.pps->transquant_bypass_enable_flag) {
2036 lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2037 if (lc->cu.cu_transquant_bypass_flag)
2038 set_deblocking_bypass(s, x0, y0, log2_cb_size);
2039 } else
2040 lc->cu.cu_transquant_bypass_flag = 0;
2041
2042 if (s->sh.slice_type != I_SLICE) {
2043 uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2044
2045 x = y_cb * min_cb_width + x_cb;
2046 for (y = 0; y < length; y++) {
2047 memset(&s->skip_flag[x], skip_flag, length);
2048 x += min_cb_width;
2049 }
2050 lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2051 }
2052
2053 if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2054 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2055 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2056
2057 if (!s->sh.disable_deblocking_filter_flag)
2058 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2059 } else {
2060 int pcm_flag = 0;
2061
2062 if (s->sh.slice_type != I_SLICE)
2063 lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2064 if (lc->cu.pred_mode != MODE_INTRA ||
2065 log2_cb_size == s->ps.sps->log2_min_cb_size) {
2066 lc->cu.part_mode = ff_hevc_part_mode_decode(s, log2_cb_size);
2067 lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2068 lc->cu.pred_mode == MODE_INTRA;
2069 }
2070
2071 if (lc->cu.pred_mode == MODE_INTRA) {
2072 if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2073 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2074 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2075 pcm_flag = ff_hevc_pcm_flag_decode(s);
2076 }
2077 if (pcm_flag) {
2078 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2079 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2080 if (s->ps.sps->pcm.loop_filter_disable_flag)
2081 set_deblocking_bypass(s, x0, y0, log2_cb_size);
2082
2083 if (ret < 0)
2084 return ret;
2085 } else {
2086 intra_prediction_unit(s, x0, y0, log2_cb_size);
2087 }
2088 } else {
2089 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2090 switch (lc->cu.part_mode) {
2091 case PART_2Nx2N:
2092 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2093 break;
2094 case PART_2NxN:
2095 hls_prediction_unit(s, x0, y0, cb_size, cb_size / 2, log2_cb_size, 0);
2096 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2097 break;
2098 case PART_Nx2N:
2099 hls_prediction_unit(s, x0, y0, cb_size / 2, cb_size, log2_cb_size, 0);
2100 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2101 break;
2102 case PART_2NxnU:
2103 hls_prediction_unit(s, x0, y0, cb_size, cb_size / 4, log2_cb_size, 0);
2104 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2105 break;
2106 case PART_2NxnD:
2107 hls_prediction_unit(s, x0, y0, cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2108 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size / 4, log2_cb_size, 1);
2109 break;
2110 case PART_nLx2N:
2111 hls_prediction_unit(s, x0, y0, cb_size / 4, cb_size, log2_cb_size, 0);
2112 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2113 break;
2114 case PART_nRx2N:
2115 hls_prediction_unit(s, x0, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2116 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size / 4, cb_size, log2_cb_size, 1);
2117 break;
2118 case PART_NxN:
2119 hls_prediction_unit(s, x0, y0, cb_size / 2, cb_size / 2, log2_cb_size, 0);
2120 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size / 2, log2_cb_size, 1);
2121 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2122 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2123 break;
2124 }
2125 }
2126
2127 if (!pcm_flag) {
2128 int rqt_root_cbf = 1;
2129
2130 if (lc->cu.pred_mode != MODE_INTRA &&
2131 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2132 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2133 }
2134 if (rqt_root_cbf) {
2135 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2136 s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2137 s->ps.sps->max_transform_hierarchy_depth_inter;
2138 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2139 log2_cb_size,
2140 log2_cb_size, 0, 0, 0, 0);
2141 if (ret < 0)
2142 return ret;
2143 } else {
2144 if (!s->sh.disable_deblocking_filter_flag)
2145 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2146 }
2147 }
2148 }
2149
2150 if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2151 ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2152
2153 x = y_cb * min_cb_width + x_cb;
2154 for (y = 0; y < length; y++) {
2155 memset(&s->qp_y_tab[x], lc->qp_y, length);
2156 x += min_cb_width;
2157 }
2158
2159 set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2160
2161 return 0;
2162 }
2163
2164 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2165 int log2_cb_size, int cb_depth)
2166 {
2167 HEVCLocalContext *lc = &s->HEVClc;
2168 const int cb_size = 1 << log2_cb_size;
2169 int split_cu;
2170
2171 lc->ct.depth = cb_depth;
2172 if (x0 + cb_size <= s->ps.sps->width &&
2173 y0 + cb_size <= s->ps.sps->height &&
2174 log2_cb_size > s->ps.sps->log2_min_cb_size) {
2175 split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2176 } else {
2177 split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2178 }
2179 if (s->ps.pps->cu_qp_delta_enabled_flag &&
2180 log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2181 lc->tu.is_cu_qp_delta_coded = 0;
2182 lc->tu.cu_qp_delta = 0;
2183 }
2184
2185 if (split_cu) {
2186 const int cb_size_split = cb_size >> 1;
2187 const int x1 = x0 + cb_size_split;
2188 const int y1 = y0 + cb_size_split;
2189
2190 log2_cb_size--;
2191 cb_depth++;
2192
2193 #define SUBDIVIDE(x, y) \
2194 do { \
2195 if (x < s->ps.sps->width && y < s->ps.sps->height) { \
2196 int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2197 if (ret < 0) \
2198 return ret; \
2199 } \
2200 } while (0)
2201
2202 SUBDIVIDE(x0, y0);
2203 SUBDIVIDE(x1, y0);
2204 SUBDIVIDE(x0, y1);
2205 SUBDIVIDE(x1, y1);
2206 } else {
2207 int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2208 if (ret < 0)
2209 return ret;
2210 }
2211
2212 return 0;
2213 }
2214
2215 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2216 int ctb_addr_ts)
2217 {
2218 HEVCLocalContext *lc = &s->HEVClc;
2219 int ctb_size = 1 << s->ps.sps->log2_ctb_size;
2220 int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2221 int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2222
2223 s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2224
2225 if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2226 if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2227 lc->first_qp_group = 1;
2228 lc->end_of_tiles_x = s->ps.sps->width;
2229 } else if (s->ps.pps->tiles_enabled_flag) {
2230 if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2231 int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2232 lc->start_of_tiles_x = x_ctb;
2233 lc->end_of_tiles_x = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2234 lc->first_qp_group = 1;
2235 }
2236 } else {
2237 lc->end_of_tiles_x = s->ps.sps->width;
2238 }
2239
2240 lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2241
2242 lc->boundary_flags = 0;
2243 if (s->ps.pps->tiles_enabled_flag) {
2244 if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2245 lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2246 if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2247 lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2248 if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2249 lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2250 if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2251 lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2252 } else {
2253 if (!ctb_addr_in_slice)
2254 lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2255 if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2256 lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2257 }
2258
2259 lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2260 lc->ctb_up_flag = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2261 lc->ctb_up_right_flag = ((y_ctb > 0) && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2262 lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0) && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2263 }
2264
2265 static int hls_slice_data(HEVCContext *s)
2266 {
2267 int ctb_size = 1 << s->ps.sps->log2_ctb_size;
2268 int more_data = 1;
2269 int x_ctb = 0;
2270 int y_ctb = 0;
2271 int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2272 int ret;
2273
2274 while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2275 int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2276
2277 x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2278 y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2279 hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2280
2281 ff_hevc_cabac_init(s, ctb_addr_ts);
2282
2283 hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2284
2285 s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2286 s->deblock[ctb_addr_rs].tc_offset = s->sh.tc_offset;
2287 s->filter_slice_edges[ctb_addr_rs] = s->sh.slice_loop_filter_across_slices_enabled_flag;
2288
2289 ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2290 if (ret < 0)
2291 return ret;
2292 more_data = !ff_hevc_end_of_slice_flag_decode(s);
2293
2294 ctb_addr_ts++;
2295 ff_hevc_save_states(s, ctb_addr_ts);
2296 ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2297 }
2298
2299 if (x_ctb + ctb_size >= s->ps.sps->width &&
2300 y_ctb + ctb_size >= s->ps.sps->height)
2301 ff_hevc_hls_filter(s, x_ctb, y_ctb);
2302
2303 return ctb_addr_ts;
2304 }
2305
2306 static void restore_tqb_pixels(HEVCContext *s)
2307 {
2308 int min_pu_size = 1 << s->ps.sps->log2_min_pu_size;
2309 int x, y, c_idx;
2310
2311 for (c_idx = 0; c_idx < 3; c_idx++) {
2312 ptrdiff_t stride = s->frame->linesize[c_idx];
2313 int hshift = s->ps.sps->hshift[c_idx];
2314 int vshift = s->ps.sps->vshift[c_idx];
2315 for (y = 0; y < s->ps.sps->min_pu_height; y++) {
2316 for (x = 0; x < s->ps.sps->min_pu_width; x++) {
2317 if (s->is_pcm[y * s->ps.sps->min_pu_width + x]) {
2318 int n;
2319 int len = min_pu_size >> hshift;
2320 uint8_t *src = &s->frame->data[c_idx][((y << s->ps.sps->log2_min_pu_size) >> vshift) * stride + (((x << s->ps.sps->log2_min_pu_size) >> hshift) << s->ps.sps->pixel_shift)];
2321 uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->ps.sps->log2_min_pu_size) >> vshift) * stride + (((x << s->ps.sps->log2_min_pu_size) >> hshift) << s->ps.sps->pixel_shift)];
2322 for (n = 0; n < (min_pu_size >> vshift); n++) {
2323 memcpy(dst, src, len);
2324 src += stride;
2325 dst += stride;
2326 }
2327 }
2328 }
2329 }
2330 }
2331 }
2332
2333 static int set_side_data(HEVCContext *s)
2334 {
2335 AVFrame *out = s->ref->frame;
2336
2337 if (s->sei_frame_packing_present &&
2338 s->frame_packing_arrangement_type >= 3 &&
2339 s->frame_packing_arrangement_type <= 5 &&
2340 s->content_interpretation_type > 0 &&
2341 s->content_interpretation_type < 3) {
2342 AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2343 if (!stereo)
2344 return AVERROR(ENOMEM);
2345
2346 switch (s->frame_packing_arrangement_type) {
2347 case 3:
2348 if (s->quincunx_subsampling)
2349 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2350 else
2351 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2352 break;
2353 case 4:
2354 stereo->type = AV_STEREO3D_TOPBOTTOM;
2355 break;
2356 case 5:
2357 stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2358 break;
2359 }
2360
2361 if (s->content_interpretation_type == 2)
2362 stereo->flags = AV_STEREO3D_FLAG_INVERT;
2363 }
2364
2365 if (s->sei_display_orientation_present &&
2366 (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2367 double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2368 AVFrameSideData *rotation = av_frame_new_side_data(out,
2369 AV_FRAME_DATA_DISPLAYMATRIX,
2370 sizeof(int32_t) * 9);
2371 if (!rotation)
2372 return AVERROR(ENOMEM);
2373
2374 av_display_rotation_set((int32_t *)rotation->data, angle);
2375 av_display_matrix_flip((int32_t *)rotation->data,
2376 s->sei_hflip, s->sei_vflip);
2377 }
2378
2379 return 0;
2380 }
2381
2382 static int hevc_frame_start(HEVCContext *s)
2383 {
2384 HEVCLocalContext *lc = &s->HEVClc;
2385 int ret;
2386
2387 memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2388 memset(s->vertical_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2389 memset(s->cbf_luma, 0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2390 memset(s->is_pcm, 0, s->ps.sps->min_pu_width * s->ps.sps->min_pu_height);
2391
2392 lc->start_of_tiles_x = 0;
2393 s->is_decoded = 0;
2394 s->first_nal_type = s->nal_unit_type;
2395
2396 if (s->ps.pps->tiles_enabled_flag)
2397 lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2398
2399 ret = ff_hevc_set_new_ref(s, s->ps.sps->sao_enabled ? &s->sao_frame : &s->frame,
2400 s->poc);
2401 if (ret < 0)
2402 goto fail;
2403
2404 ret = ff_hevc_frame_rps(s);
2405 if (ret < 0) {
2406 av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2407 goto fail;
2408 }
2409
2410 s->ref->frame->key_frame = IS_IRAP(s);
2411
2412 ret = set_side_data(s);
2413 if (ret < 0)
2414 goto fail;
2415
2416 av_frame_unref(s->output_frame);
2417 ret = ff_hevc_output_frame(s, s->output_frame, 0);
2418 if (ret < 0)
2419 goto fail;
2420
2421 ff_thread_finish_setup(s->avctx);
2422
2423 return 0;
2424
2425 fail:
2426 if (s->ref)
2427 ff_hevc_unref_frame(s, s->ref, ~0);
2428 s->ref = NULL;
2429 return ret;
2430 }
2431
2432 static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
2433 {
2434 HEVCLocalContext *lc = &s->HEVClc;
2435 GetBitContext *gb = &lc->gb;
2436 int ctb_addr_ts, ret;
2437
2438 *gb = nal->gb;
2439 s->nal_unit_type = nal->type;
2440 s->temporal_id = nal->temporal_id;
2441
2442 switch (s->nal_unit_type) {
2443 case NAL_VPS:
2444 ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2445 if (ret < 0)
2446 goto fail;
2447 break;
2448 case NAL_SPS:
2449 ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2450 s->apply_defdispwin);
2451 if (ret < 0)
2452 goto fail;
2453 break;
2454 case NAL_PPS:
2455 ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2456 if (ret < 0)
2457 goto fail;
2458 break;
2459 case NAL_SEI_PREFIX:
2460 case NAL_SEI_SUFFIX:
2461 ret = ff_hevc_decode_nal_sei(s);
2462 if (ret < 0)
2463 goto fail;
2464 break;
2465 case NAL_TRAIL_R:
2466 case NAL_TRAIL_N:
2467 case NAL_TSA_N:
2468 case NAL_TSA_R:
2469 case NAL_STSA_N:
2470 case NAL_STSA_R:
2471 case NAL_BLA_W_LP:
2472 case NAL_BLA_W_RADL:
2473 case NAL_BLA_N_LP:
2474 case NAL_IDR_W_RADL:
2475 case NAL_IDR_N_LP:
2476 case NAL_CRA_NUT:
2477 case NAL_RADL_N:
2478 case NAL_RADL_R:
2479 case NAL_RASL_N:
2480 case NAL_RASL_R:
2481 ret = hls_slice_header(s);
2482 if (ret < 0)
2483 return ret;
2484
2485 if (s->max_ra == INT_MAX) {
2486 if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2487 s->max_ra = s->poc;
2488 } else {
2489 if (IS_IDR(s))
2490 s->max_ra = INT_MIN;
2491 }
2492 }
2493
2494 if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2495 s->poc <= s->max_ra) {
2496 s->is_decoded = 0;
2497 break;
2498 } else {
2499 if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2500 s->max_ra = INT_MIN;
2501 }
2502
2503 if (s->sh.first_slice_in_pic_flag) {
2504 ret = hevc_frame_start(s);
2505 if (ret < 0)
2506 return ret;
2507 } else if (!s->ref) {
2508 av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2509 goto fail;
2510 }
2511
2512 if (s->nal_unit_type != s->first_nal_type) {
2513 av_log(s->avctx, AV_LOG_ERROR,
2514 "Non-matching NAL types of the VCL NALUs: %d %d\n",
2515 s->first_nal_type, s->nal_unit_type);
2516 return AVERROR_INVALIDDATA;
2517 }
2518
2519 if (!s->sh.dependent_slice_segment_flag &&
2520 s->sh.slice_type != I_SLICE) {
2521 ret = ff_hevc_slice_rpl(s);
2522 if (ret < 0) {
2523 av_log(s->avctx, AV_LOG_WARNING,
2524 "Error constructing the reference lists for the current slice.\n");
2525 goto fail;
2526 }
2527 }
2528
2529 if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2530 ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2531 if (ret < 0)
2532 goto fail;
2533 }
2534
2535 if (s->avctx->hwaccel) {
2536 ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2537 if (ret < 0)
2538 goto fail;
2539 } else {
2540 ctb_addr_ts = hls_slice_data(s);
2541 if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
2542 s->is_decoded = 1;
2543 if ((s->ps.pps->transquant_bypass_enable_flag ||
2544 (s->ps.sps->pcm.loop_filter_disable_flag && s->ps.sps->pcm_enabled_flag)) &&
2545 s->ps.sps->sao_enabled)
2546 restore_tqb_pixels(s);
2547 }
2548
2549 if (ctb_addr_ts < 0) {
2550 ret = ctb_addr_ts;
2551 goto fail;
2552 }
2553 }
2554 break;
2555 case NAL_EOS_NUT:
2556 case NAL_EOB_NUT:
2557 s->seq_decode = (s->seq_decode + 1) & 0xff;
2558 s->max_ra = INT_MAX;
2559 break;
2560 case NAL_AUD:
2561 case NAL_FD_NUT:
2562 break;
2563 default:
2564 av_log(s->avctx, AV_LOG_INFO,
2565 "Skipping NAL unit %d\n", s->nal_unit_type);
2566 }
2567
2568 return 0;
2569 fail:
2570 if (s->avctx->err_recognition & AV_EF_EXPLODE)
2571 return ret;
2572 return 0;
2573 }
2574
2575 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2576 {
2577 int i, ret = 0;
2578
2579 s->ref = NULL;
2580 s->eos = 0;
2581
2582 /* split the input packet into NAL units, so we know the upper bound on the
2583 * number of slices in the frame */
2584 ret = ff_h2645_packet_split(&s->pkt, buf, length, s->avctx, s->is_nalff,
2585 s->nal_length_size, s->avctx->codec_id);
2586 if (ret < 0) {
2587 av_log(s->avctx, AV_LOG_ERROR,
2588 "Error splitting the input into NAL units.\n");
2589 return ret;
2590 }
2591
2592 for (i = 0; i < s->pkt.nb_nals; i++) {
2593 if (s->pkt.nals[i].type == NAL_EOB_NUT ||
2594 s->pkt.nals[i].type == NAL_EOS_NUT)
2595 s->eos = 1;
2596 }
2597
2598 /* decode the NAL units */
2599 for (i = 0; i < s->pkt.nb_nals; i++) {
2600 ret = decode_nal_unit(s, &s->pkt.nals[i]);
2601 if (ret < 0) {
2602 av_log(s->avctx, AV_LOG_WARNING,
2603 "Error parsing NAL unit #%d.\n", i);
2604 goto fail;
2605 }
2606 }
2607
2608 fail:
2609 if (s->ref)
2610 ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2611
2612 return ret;
2613 }
2614
2615 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2616 {
2617 int i;
2618 for (i = 0; i < 16; i++)
2619 av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2620 }
2621
2622 static int verify_md5(HEVCContext *s, AVFrame *frame)
2623 {
2624 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2625 int pixel_shift;
2626 int i, j;
2627
2628 if (!desc)
2629 return AVERROR(EINVAL);
2630
2631 pixel_shift = desc->comp[0].depth > 8;
2632
2633 av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2634 s->poc);
2635
2636 /* the checksums are LE, so we have to byteswap for >8bpp formats
2637 * on BE arches */
2638 #if HAVE_BIGENDIAN
2639 if (pixel_shift && !s->checksum_buf) {
2640 av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2641 FFMAX3(frame->linesize[0], frame->linesize[1],
2642 frame->linesize[2]));
2643 if (!s->checksum_buf)
2644 return AVERROR(ENOMEM);
2645 }
2646 #endif
2647
2648 for (i = 0; frame->data[i]; i++) {
2649 int width = s->avctx->coded_width;
2650 int height = s->avctx->coded_height;
2651 int w = (i == 1 || i == 2) ? (width >> desc->log2_chroma_w) : width;
2652 int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2653 uint8_t md5[16];
2654
2655 av_md5_init(s->md5_ctx);
2656 for (j = 0; j < h; j++) {
2657 const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2658 #if HAVE_BIGENDIAN
2659 if (pixel_shift) {
2660 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2661 (const uint16_t *) src, w);
2662 src = s->checksum_buf;
2663 }
2664 #endif
2665 av_md5_update(s->md5_ctx, src, w << pixel_shift);
2666 }
2667 av_md5_final(s->md5_ctx, md5);
2668
2669 if (!memcmp(md5, s->md5[i], 16)) {
2670 av_log (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2671 print_md5(s->avctx, AV_LOG_DEBUG, md5);
2672 av_log (s->avctx, AV_LOG_DEBUG, "; ");
2673 } else {
2674 av_log (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2675 print_md5(s->avctx, AV_LOG_ERROR, md5);
2676 av_log (s->avctx, AV_LOG_ERROR, " != ");
2677 print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2678 av_log (s->avctx, AV_LOG_ERROR, "\n");
2679 return AVERROR_INVALIDDATA;
2680 }
2681 }
2682
2683 av_log(s->avctx, AV_LOG_DEBUG, "\n");
2684
2685 return 0;
2686 }
2687
2688 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2689 AVPacket *avpkt)
2690 {
2691 int ret;
2692 HEVCContext *s = avctx->priv_data;
2693
2694 if (!avpkt->size) {
2695 ret = ff_hevc_output_frame(s, data, 1);
2696 if (ret < 0)
2697 return ret;
2698
2699 *got_output = ret;
2700 return 0;
2701 }
2702
2703 s->ref = NULL;
2704 ret = decode_nal_units(s, avpkt->data, avpkt->size);
2705 if (ret < 0)
2706 return ret;
2707
2708 if (avctx->hwaccel) {
2709 if (s->ref && avctx->hwaccel->end_frame(avctx) < 0)
2710 av_log(avctx, AV_LOG_ERROR,
2711 "hardware accelerator failed to decode picture\n");
2712 } else {
2713 /* verify the SEI checksum */
2714 if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2715 s->is_md5) {
2716 ret = verify_md5(s, s->ref->frame);
2717 if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2718 ff_hevc_unref_frame(s, s->ref, ~0);
2719 return ret;
2720 }
2721 }
2722 }
2723 s->is_md5 = 0;
2724
2725 if (s->is_decoded) {
2726 av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2727 s->is_decoded = 0;
2728 }
2729
2730 if (s->output_frame->buf[0]) {
2731 av_frame_move_ref(data, s->output_frame);
2732 *got_output = 1;
2733 }
2734
2735 return avpkt->size;
2736 }
2737
2738 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2739 {
2740 int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2741 if (ret < 0)
2742 return ret;
2743
2744 dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2745 if (!dst->tab_mvf_buf)
2746 goto fail;
2747 dst->tab_mvf = src->tab_mvf;
2748
2749 dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2750 if (!dst->rpl_tab_buf)
2751 goto fail;
2752 dst->rpl_tab = src->rpl_tab;
2753
2754 dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2755 if (!dst->rpl_buf)
2756 goto fail;
2757
2758 dst->poc = src->poc;
2759 dst->ctb_count = src->ctb_count;
2760 dst->window = src->window;
2761 dst->flags = src->flags;
2762 dst->sequence = src->sequence;
2763
2764 if (src->hwaccel_picture_private) {
2765 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
2766 if (!dst->hwaccel_priv_buf)
2767 goto fail;
2768 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
2769 }
2770
2771 return 0;
2772 fail:
2773 ff_hevc_unref_frame(s, dst, ~0);
2774 return AVERROR(ENOMEM);
2775 }
2776
2777 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2778 {
2779 HEVCContext *s = avctx->priv_data;
2780 int i;
2781
2782 pic_arrays_free(s);
2783
2784 av_freep(&s->md5_ctx);
2785
2786 av_frame_free(&s->tmp_frame);
2787 av_frame_free(&s->output_frame);
2788
2789 for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2790 ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2791 av_frame_free(&s->DPB[i].frame);
2792 }
2793
2794 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++)
2795 av_buffer_unref(&s->ps.vps_list[i]);
2796 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++)
2797 av_buffer_unref(&s->ps.sps_list[i]);
2798 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++)
2799 av_buffer_unref(&s->ps.pps_list[i]);
2800
2801 ff_h2645_packet_uninit(&s->pkt);
2802
2803 return 0;
2804 }
2805
2806 static av_cold int hevc_init_context(AVCodecContext *avctx)
2807 {
2808 HEVCContext *s = avctx->priv_data;
2809 int i;
2810
2811 s->avctx = avctx;
2812
2813 s->tmp_frame = av_frame_alloc();
2814 if (!s->tmp_frame)
2815 goto fail;
2816
2817 s->output_frame = av_frame_alloc();
2818 if (!s->output_frame)
2819 goto fail;
2820
2821 for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2822 s->DPB[i].frame = av_frame_alloc();
2823 if (!s->DPB[i].frame)
2824 goto fail;
2825 s->DPB[i].tf.f = s->DPB[i].frame;
2826 }
2827
2828 s->max_ra = INT_MAX;
2829
2830 s->md5_ctx = av_md5_alloc();
2831 if (!s->md5_ctx)
2832 goto fail;
2833
2834 ff_bswapdsp_init(&s->bdsp);
2835
2836 s->context_initialized = 1;
2837
2838 return 0;
2839
2840 fail:
2841 hevc_decode_free(avctx);
2842 return AVERROR(ENOMEM);
2843 }