2 * DXVA2 HEVC HW acceleration.
4 * copyright (c) 2014 - 2015 Hendrik Leppkes
6 * This file is part of Libav.
8 * Libav is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * Libav is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with Libav; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "libavutil/avassert.h"
25 #include "hevc_data.h"
28 // The headers above may include w32threads.h, which uses the original
29 // _WIN32_WINNT define, while dxva2_internal.h redefines it to target a
30 // potentially newer version.
31 #include "dxva2_internal.h"
33 #define MAX_SLICES 256
35 struct hevc_dxva2_picture_context
{
36 DXVA_PicParams_HEVC pp
;
39 DXVA_Slice_HEVC_Short slice_short
[MAX_SLICES
];
40 const uint8_t *bitstream
;
41 unsigned bitstream_size
;
44 static void fill_picture_entry(DXVA_PicEntry_HEVC
*pic
,
45 unsigned index
, unsigned flag
)
47 av_assert0((index
& 0x7f) == index
&& (flag
& 0x01) == flag
);
48 pic
->bPicEntry
= index
| (flag
<< 7);
51 static int get_refpic_index(const DXVA_PicParams_HEVC
*pp
, int surface_index
)
54 for (i
= 0; i
< FF_ARRAY_ELEMS(pp
->RefPicList
); i
++) {
55 if ((pp
->RefPicList
[i
].bPicEntry
& 0x7f) == surface_index
)
61 static void fill_picture_parameters(const AVCodecContext
*avctx
, AVDXVAContext
*ctx
, const HEVCContext
*h
,
62 DXVA_PicParams_HEVC
*pp
)
64 const HEVCFrame
*current_picture
= h
->ref
;
65 const HEVCSPS
*sps
= h
->ps
.sps
;
66 const HEVCPPS
*pps
= h
->ps
.pps
;
69 memset(pp
, 0, sizeof(*pp
));
71 pp
->PicWidthInMinCbsY
= sps
->min_cb_width
;
72 pp
->PicHeightInMinCbsY
= sps
->min_cb_height
;
74 pp
->wFormatAndSequenceInfoFlags
= (sps
->chroma_format_idc
<< 0) |
75 (sps
->separate_colour_plane_flag
<< 2) |
76 ((sps
->bit_depth
- 8) << 3) |
77 ((sps
->bit_depth
- 8) << 6) |
78 ((sps
->log2_max_poc_lsb
- 4) << 9) |
83 fill_picture_entry(&pp
->CurrPic
, ff_dxva2_get_surface_index(avctx
, ctx
, current_picture
->frame
), 0);
85 pp
->sps_max_dec_pic_buffering_minus1
= sps
->temporal_layer
[sps
->max_sub_layers
- 1].max_dec_pic_buffering
- 1;
86 pp
->log2_min_luma_coding_block_size_minus3
= sps
->log2_min_cb_size
- 3;
87 pp
->log2_diff_max_min_luma_coding_block_size
= sps
->log2_diff_max_min_coding_block_size
;
88 pp
->log2_min_transform_block_size_minus2
= sps
->log2_min_tb_size
- 2;
89 pp
->log2_diff_max_min_transform_block_size
= sps
->log2_max_trafo_size
- sps
->log2_min_tb_size
;
90 pp
->max_transform_hierarchy_depth_inter
= sps
->max_transform_hierarchy_depth_inter
;
91 pp
->max_transform_hierarchy_depth_intra
= sps
->max_transform_hierarchy_depth_intra
;
92 pp
->num_short_term_ref_pic_sets
= sps
->nb_st_rps
;
93 pp
->num_long_term_ref_pics_sps
= sps
->num_long_term_ref_pics_sps
;
95 pp
->num_ref_idx_l0_default_active_minus1
= pps
->num_ref_idx_l0_default_active
- 1;
96 pp
->num_ref_idx_l1_default_active_minus1
= pps
->num_ref_idx_l1_default_active
- 1;
97 pp
->init_qp_minus26
= pps
->pic_init_qp_minus26
;
99 if (h
->sh
.short_term_ref_pic_set_sps_flag
== 0 && h
->sh
.short_term_rps
) {
100 pp
->ucNumDeltaPocsOfRefRpsIdx
= h
->sh
.short_term_rps
->rps_idx_num_delta_pocs
;
101 pp
->wNumBitsForShortTermRPSInSlice
= h
->sh
.short_term_ref_pic_set_size
;
104 pp
->dwCodingParamToolFlags
= (sps
->scaling_list_enable_flag
<< 0) |
105 (sps
->amp_enabled_flag
<< 1) |
106 (sps
->sao_enabled
<< 2) |
107 (sps
->pcm_enabled_flag
<< 3) |
108 ((sps
->pcm_enabled_flag ?
(sps
->pcm
.bit_depth
- 1) : 0) << 4) |
109 ((sps
->pcm_enabled_flag ?
(sps
->pcm
.bit_depth_chroma
- 1) : 0) << 8) |
110 ((sps
->pcm_enabled_flag ?
(sps
->pcm
.log2_min_pcm_cb_size
- 3) : 0) << 12) |
111 ((sps
->pcm_enabled_flag ?
(sps
->pcm
.log2_max_pcm_cb_size
- sps
->pcm
.log2_min_pcm_cb_size
) : 0) << 14) |
112 (sps
->pcm
.loop_filter_disable_flag
<< 16) |
113 (sps
->long_term_ref_pics_present_flag
<< 17) |
114 (sps
->sps_temporal_mvp_enabled_flag
<< 18) |
115 (sps
->sps_strong_intra_smoothing_enable_flag
<< 19) |
116 (pps
->dependent_slice_segments_enabled_flag
<< 20) |
117 (pps
->output_flag_present_flag
<< 21) |
118 (pps
->num_extra_slice_header_bits
<< 22) |
119 (pps
->sign_data_hiding_flag
<< 25) |
120 (pps
->cabac_init_present_flag
<< 26) |
123 pp
->dwCodingSettingPicturePropertyFlags
= (pps
->constrained_intra_pred_flag
<< 0) |
124 (pps
->transform_skip_enabled_flag
<< 1) |
125 (pps
->cu_qp_delta_enabled_flag
<< 2) |
126 (pps
->pic_slice_level_chroma_qp_offsets_present_flag
<< 3) |
127 (pps
->weighted_pred_flag
<< 4) |
128 (pps
->weighted_bipred_flag
<< 5) |
129 (pps
->transquant_bypass_enable_flag
<< 6) |
130 (pps
->tiles_enabled_flag
<< 7) |
131 (pps
->entropy_coding_sync_enabled_flag
<< 8) |
132 (pps
->uniform_spacing_flag
<< 9) |
133 ((pps
->tiles_enabled_flag ? pps
->loop_filter_across_tiles_enabled_flag
: 0) << 10) |
134 (pps
->seq_loop_filter_across_slices_enabled_flag
<< 11) |
135 (pps
->deblocking_filter_override_enabled_flag
<< 12) |
136 (pps
->disable_dbf
<< 13) |
137 (pps
->lists_modification_present_flag
<< 14) |
138 (pps
->slice_header_extension_present_flag
<< 15) |
144 pp
->pps_cb_qp_offset
= pps
->cb_qp_offset
;
145 pp
->pps_cr_qp_offset
= pps
->cr_qp_offset
;
146 if (pps
->tiles_enabled_flag
) {
147 pp
->num_tile_columns_minus1
= pps
->num_tile_columns
- 1;
148 pp
->num_tile_rows_minus1
= pps
->num_tile_rows
- 1;
150 if (!pps
->uniform_spacing_flag
) {
151 for (i
= 0; i
< pps
->num_tile_columns
; i
++)
152 pp
->column_width_minus1
[i
] = pps
->column_width
[i
] - 1;
154 for (i
= 0; i
< pps
->num_tile_rows
; i
++)
155 pp
->row_height_minus1
[i
] = pps
->row_height
[i
] - 1;
159 pp
->diff_cu_qp_delta_depth
= pps
->diff_cu_qp_delta_depth
;
160 pp
->pps_beta_offset_div2
= pps
->beta_offset
/ 2;
161 pp
->pps_tc_offset_div2
= pps
->tc_offset
/ 2;
162 pp
->log2_parallel_merge_level_minus2
= pps
->log2_parallel_merge_level
- 2;
163 pp
->CurrPicOrderCntVal
= h
->poc
;
165 // fill RefPicList from the DPB
166 for (i
= 0, j
= 0; i
< FF_ARRAY_ELEMS(pp
->RefPicList
); i
++) {
167 const HEVCFrame
*frame
= NULL
;
168 while (!frame
&& j
< FF_ARRAY_ELEMS(h
->DPB
)) {
169 if (&h
->DPB
[j
] != current_picture
&& (h
->DPB
[j
].flags
& (HEVC_FRAME_FLAG_LONG_REF
| HEVC_FRAME_FLAG_SHORT_REF
)))
175 fill_picture_entry(&pp
->RefPicList
[i
], ff_dxva2_get_surface_index(avctx
, ctx
, frame
->frame
), !!(frame
->flags
& HEVC_FRAME_FLAG_LONG_REF
));
176 pp
->PicOrderCntValList
[i
] = frame
->poc
;
178 pp
->RefPicList
[i
].bPicEntry
= 0xff;
179 pp
->PicOrderCntValList
[i
] = 0;
183 #define DO_REF_LIST(ref_idx, ref_list) { \
184 const RefPicList *rpl = &h->rps[ref_idx]; \
185 for (i = 0, j = 0; i < FF_ARRAY_ELEMS(pp->ref_list); i++) { \
186 const HEVCFrame *frame = NULL; \
187 while (!frame && j < rpl->nb_refs) \
188 frame = rpl->ref[j++]; \
190 pp->ref_list[i] = get_refpic_index(pp, ff_dxva2_get_surface_index(avctx, ctx, frame->frame)); \
192 pp->ref_list[i] = 0xff; \
196 // Fill short term and long term lists
197 DO_REF_LIST(ST_CURR_BEF
, RefPicSetStCurrBefore
);
198 DO_REF_LIST(ST_CURR_AFT
, RefPicSetStCurrAfter
);
199 DO_REF_LIST(LT_CURR
, RefPicSetLtCurr
);
201 pp
->StatusReportFeedbackNumber
= 1 + DXVA_CONTEXT_REPORT_ID(avctx
, ctx
)++;
204 static void fill_scaling_lists(AVDXVAContext
*ctx
, const HEVCContext
*h
, DXVA_Qmatrix_HEVC
*qm
)
207 const ScalingList
*sl
= h
->ps
.pps
->scaling_list_data_present_flag ?
208 &h
->ps
.pps
->scaling_list
: &h
->ps
.sps
->scaling_list
;
210 memset(qm
, 0, sizeof(*qm
));
211 for (i
= 0; i
< 6; i
++) {
212 for (j
= 0; j
< 16; j
++) {
213 pos
= 4 * ff_hevc_diag_scan4x4_y
[j
] + ff_hevc_diag_scan4x4_x
[j
];
214 qm
->ucScalingLists0
[i
][j
] = sl
->sl
[0][i
][pos
];
217 for (j
= 0; j
< 64; j
++) {
218 pos
= 8 * ff_hevc_diag_scan8x8_y
[j
] + ff_hevc_diag_scan8x8_x
[j
];
219 qm
->ucScalingLists1
[i
][j
] = sl
->sl
[1][i
][pos
];
220 qm
->ucScalingLists2
[i
][j
] = sl
->sl
[2][i
][pos
];
223 qm
->ucScalingLists3
[i
][j
] = sl
->sl
[3][i
* 3][pos
];
226 qm
->ucScalingListDCCoefSizeID2
[i
] = sl
->sl_dc
[0][i
];
228 qm
->ucScalingListDCCoefSizeID3
[i
] = sl
->sl_dc
[1][i
* 3];
232 static void fill_slice_short(DXVA_Slice_HEVC_Short
*slice
,
233 unsigned position
, unsigned size
)
235 memset(slice
, 0, sizeof(*slice
));
236 slice
->BSNALunitDataLocation
= position
;
237 slice
->SliceBytesInBuffer
= size
;
238 slice
->wBadSliceChopping
= 0;
241 static int commit_bitstream_and_slice_buffer(AVCodecContext
*avctx
,
242 DECODER_BUFFER_DESC
*bs
,
243 DECODER_BUFFER_DESC
*sc
)
245 const HEVCContext
*h
= avctx
->priv_data
;
246 AVDXVAContext
*ctx
= DXVA_CONTEXT(avctx
);
247 const HEVCFrame
*current_picture
= h
->ref
;
248 struct hevc_dxva2_picture_context
*ctx_pic
= current_picture
->hwaccel_picture_private
;
249 DXVA_Slice_HEVC_Short
*slice
= NULL
;
251 uint8_t *dxva_data
, *current
, *end
;
259 /* Create an annex B bitstream buffer with only slice NAL and finalize slice */
261 if (ff_dxva2_is_d3d11(avctx
)) {
262 type
= D3D11_VIDEO_DECODER_BUFFER_BITSTREAM
;
263 if (FAILED(ID3D11VideoContext_GetDecoderBuffer(D3D11VA_CONTEXT(ctx
)->video_context
,
264 D3D11VA_CONTEXT(ctx
)->decoder
,
266 &dxva_size
, &dxva_data_ptr
)))
271 if (avctx
->pix_fmt
== AV_PIX_FMT_DXVA2_VLD
) {
272 type
= DXVA2_BitStreamDateBufferType
;
273 if (FAILED(IDirectXVideoDecoder_GetBuffer(DXVA2_CONTEXT(ctx
)->decoder
,
275 &dxva_data_ptr
, &dxva_size
)))
280 dxva_data
= dxva_data_ptr
;
282 end
= dxva_data
+ dxva_size
;
284 for (i
= 0; i
< ctx_pic
->slice_count
; i
++) {
285 static const uint8_t start_code
[] = { 0, 0, 1 };
286 static const unsigned start_code_size
= sizeof(start_code
);
287 unsigned position
, size
;
289 slice
= &ctx_pic
->slice_short
[i
];
291 position
= slice
->BSNALunitDataLocation
;
292 size
= slice
->SliceBytesInBuffer
;
293 if (start_code_size
+ size
> end
- current
) {
294 av_log(avctx
, AV_LOG_ERROR
, "Failed to build bitstream");
298 slice
->BSNALunitDataLocation
= current
- dxva_data
;
299 slice
->SliceBytesInBuffer
= start_code_size
+ size
;
301 memcpy(current
, start_code
, start_code_size
);
302 current
+= start_code_size
;
304 memcpy(current
, &ctx_pic
->bitstream
[position
], size
);
307 padding
= FFMIN(128 - ((current
- dxva_data
) & 127), end
- current
);
308 if (slice
&& padding
> 0) {
309 memset(current
, 0, padding
);
312 slice
->SliceBytesInBuffer
+= padding
;
315 if (ff_dxva2_is_d3d11(avctx
))
316 if (FAILED(ID3D11VideoContext_ReleaseDecoderBuffer(D3D11VA_CONTEXT(ctx
)->video_context
, D3D11VA_CONTEXT(ctx
)->decoder
, type
)))
320 if (avctx
->pix_fmt
== AV_PIX_FMT_DXVA2_VLD
)
321 if (FAILED(IDirectXVideoDecoder_ReleaseBuffer(DXVA2_CONTEXT(ctx
)->decoder
, type
)))
324 if (i
< ctx_pic
->slice_count
)
328 if (ff_dxva2_is_d3d11(avctx
)) {
329 D3D11_VIDEO_DECODER_BUFFER_DESC
*dsc11
= bs
;
330 memset(dsc11
, 0, sizeof(*dsc11
));
331 dsc11
->BufferType
= type
;
332 dsc11
->DataSize
= current
- dxva_data
;
333 dsc11
->NumMBsInBuffer
= 0;
335 type
= D3D11_VIDEO_DECODER_BUFFER_SLICE_CONTROL
;
339 if (avctx
->pix_fmt
== AV_PIX_FMT_DXVA2_VLD
) {
340 DXVA2_DecodeBufferDesc
*dsc2
= bs
;
341 memset(dsc2
, 0, sizeof(*dsc2
));
342 dsc2
->CompressedBufferType
= type
;
343 dsc2
->DataSize
= current
- dxva_data
;
344 dsc2
->NumMBsInBuffer
= 0;
346 type
= DXVA2_SliceControlBufferType
;
350 slice_data
= ctx_pic
->slice_short
;
351 slice_size
= ctx_pic
->slice_count
* sizeof(*ctx_pic
->slice_short
);
353 av_assert0(((current
- dxva_data
) & 127) == 0);
354 return ff_dxva2_commit_buffer(avctx
, ctx
, sc
,
356 slice_data
, slice_size
, 0);
360 static int dxva2_hevc_start_frame(AVCodecContext
*avctx
,
361 av_unused
const uint8_t *buffer
,
362 av_unused
uint32_t size
)
364 const HEVCContext
*h
= avctx
->priv_data
;
365 AVDXVAContext
*ctx
= DXVA_CONTEXT(avctx
);
366 struct hevc_dxva2_picture_context
*ctx_pic
= h
->ref
->hwaccel_picture_private
;
368 if (!DXVA_CONTEXT_VALID(avctx
, ctx
))
372 /* Fill up DXVA_PicParams_HEVC */
373 fill_picture_parameters(avctx
, ctx
, h
, &ctx_pic
->pp
);
375 /* Fill up DXVA_Qmatrix_HEVC */
376 fill_scaling_lists(ctx
, h
, &ctx_pic
->qm
);
378 ctx_pic
->slice_count
= 0;
379 ctx_pic
->bitstream_size
= 0;
380 ctx_pic
->bitstream
= NULL
;
384 static int dxva2_hevc_decode_slice(AVCodecContext
*avctx
,
385 const uint8_t *buffer
,
388 const HEVCContext
*h
= avctx
->priv_data
;
389 const HEVCFrame
*current_picture
= h
->ref
;
390 struct hevc_dxva2_picture_context
*ctx_pic
= current_picture
->hwaccel_picture_private
;
393 if (ctx_pic
->slice_count
>= MAX_SLICES
)
396 if (!ctx_pic
->bitstream
)
397 ctx_pic
->bitstream
= buffer
;
398 ctx_pic
->bitstream_size
+= size
;
400 position
= buffer
- ctx_pic
->bitstream
;
401 fill_slice_short(&ctx_pic
->slice_short
[ctx_pic
->slice_count
], position
, size
);
402 ctx_pic
->slice_count
++;
407 static int dxva2_hevc_end_frame(AVCodecContext
*avctx
)
409 HEVCContext
*h
= avctx
->priv_data
;
410 struct hevc_dxva2_picture_context
*ctx_pic
= h
->ref
->hwaccel_picture_private
;
411 int scale
= ctx_pic
->pp
.dwCodingParamToolFlags
& 1;
414 if (ctx_pic
->slice_count
<= 0 || ctx_pic
->bitstream_size
<= 0)
417 ret
= ff_dxva2_common_end_frame(avctx
, h
->ref
->frame
,
418 &ctx_pic
->pp
, sizeof(ctx_pic
->pp
),
419 scale ?
&ctx_pic
->qm
: NULL
, scale ?
sizeof(ctx_pic
->qm
) : 0,
420 commit_bitstream_and_slice_buffer
);
424 #if CONFIG_HEVC_DXVA2_HWACCEL
425 AVHWAccel ff_hevc_dxva2_hwaccel
= {
426 .name
= "hevc_dxva2",
427 .type
= AVMEDIA_TYPE_VIDEO
,
428 .id
= AV_CODEC_ID_HEVC
,
429 .pix_fmt
= AV_PIX_FMT_DXVA2_VLD
,
430 .init
= ff_dxva2_decode_init
,
431 .uninit
= ff_dxva2_decode_uninit
,
432 .start_frame
= dxva2_hevc_start_frame
,
433 .decode_slice
= dxva2_hevc_decode_slice
,
434 .end_frame
= dxva2_hevc_end_frame
,
435 .frame_priv_data_size
= sizeof(struct hevc_dxva2_picture_context
),
436 .priv_data_size
= sizeof(FFDXVASharedContext
),
440 #if CONFIG_HEVC_D3D11VA_HWACCEL
441 AVHWAccel ff_hevc_d3d11va_hwaccel
= {
442 .name
= "hevc_d3d11va",
443 .type
= AVMEDIA_TYPE_VIDEO
,
444 .id
= AV_CODEC_ID_HEVC
,
445 .pix_fmt
= AV_PIX_FMT_D3D11VA_VLD
,
446 .init
= ff_dxva2_decode_init
,
447 .uninit
= ff_dxva2_decode_uninit
,
448 .start_frame
= dxva2_hevc_start_frame
,
449 .decode_slice
= dxva2_hevc_decode_slice
,
450 .end_frame
= dxva2_hevc_end_frame
,
451 .frame_priv_data_size
= sizeof(struct hevc_dxva2_picture_context
),
452 .priv_data_size
= sizeof(FFDXVASharedContext
),
456 #if CONFIG_HEVC_D3D11VA2_HWACCEL
457 AVHWAccel ff_hevc_d3d11va2_hwaccel
= {
458 .name
= "hevc_d3d11va2",
459 .type
= AVMEDIA_TYPE_VIDEO
,
460 .id
= AV_CODEC_ID_HEVC
,
461 .pix_fmt
= AV_PIX_FMT_D3D11
,
462 .init
= ff_dxva2_decode_init
,
463 .uninit
= ff_dxva2_decode_uninit
,
464 .start_frame
= dxva2_hevc_start_frame
,
465 .decode_slice
= dxva2_hevc_decode_slice
,
466 .end_frame
= dxva2_hevc_end_frame
,
467 .frame_priv_data_size
= sizeof(struct hevc_dxva2_picture_context
),
468 .priv_data_size
= sizeof(FFDXVASharedContext
),