3 * Copyright (C) 2015 Luca Barbato
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include <nvEncodeAPI.h>
28 #define CUDA_LIBNAME "libcuda.so"
33 #define NVENC_LIBNAME "libnvidia-encode.so"
39 #define NVENC_LIBNAME "nvEncodeAPI64.dll"
41 #define NVENC_LIBNAME "nvEncodeAPI.dll"
44 #define dlopen(filename, flags) LoadLibrary((filename))
45 #define dlsym(handle, symbol) GetProcAddress(handle, symbol)
46 #define dlclose(handle) FreeLibrary(handle)
49 #include "libavutil/common.h"
50 #include "libavutil/imgutils.h"
51 #include "libavutil/mem.h"
56 #define NVENC_CAP 0x30
57 #define BITSTREAM_BUFFER_SIZE 1024 * 1024
59 #define LOAD_LIBRARY(l, path) \
61 if (!((l) = dlopen(path, RTLD_LAZY))) { \
62 av_log(avctx, AV_LOG_ERROR, \
65 return AVERROR_UNKNOWN; \
69 #define LOAD_SYMBOL(fun, lib, symbol) \
71 if (!((fun) = dlsym(lib, symbol))) { \
72 av_log(avctx, AV_LOG_ERROR, \
75 return AVERROR_UNKNOWN; \
79 static av_cold
int nvenc_load_libraries(AVCodecContext
*avctx
)
81 NVENCContext
*ctx
= avctx
->priv_data
;
82 NVENCLibraryContext
*nvel
= &ctx
->nvel
;
83 PNVENCODEAPICREATEINSTANCE nvenc_create_instance
;
85 LOAD_LIBRARY(nvel
->cuda
, CUDA_LIBNAME
);
87 LOAD_SYMBOL(nvel
->cu_init
, nvel
->cuda
, "cuInit");
88 LOAD_SYMBOL(nvel
->cu_device_get_count
, nvel
->cuda
, "cuDeviceGetCount");
89 LOAD_SYMBOL(nvel
->cu_device_get
, nvel
->cuda
, "cuDeviceGet");
90 LOAD_SYMBOL(nvel
->cu_device_get_name
, nvel
->cuda
, "cuDeviceGetName");
91 LOAD_SYMBOL(nvel
->cu_device_compute_capability
, nvel
->cuda
,
92 "cuDeviceComputeCapability");
93 LOAD_SYMBOL(nvel
->cu_ctx_create
, nvel
->cuda
, "cuCtxCreate_v2");
94 LOAD_SYMBOL(nvel
->cu_ctx_pop_current
, nvel
->cuda
, "cuCtxPopCurrent_v2");
95 LOAD_SYMBOL(nvel
->cu_ctx_destroy
, nvel
->cuda
, "cuCtxDestroy_v2");
97 LOAD_LIBRARY(nvel
->nvenc
, NVENC_LIBNAME
);
99 LOAD_SYMBOL(nvenc_create_instance
, nvel
->nvenc
,
100 "NvEncodeAPICreateInstance");
102 nvel
->nvenc_funcs
.version
= NV_ENCODE_API_FUNCTION_LIST_VER
;
104 if ((nvenc_create_instance(&nvel
->nvenc_funcs
)) != NV_ENC_SUCCESS
) {
105 av_log(avctx
, AV_LOG_ERROR
, "Cannot create the NVENC instance");
106 return AVERROR_UNKNOWN
;
112 static int nvenc_open_session(AVCodecContext
*avctx
)
114 NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params
= { 0 };
115 NVENCContext
*ctx
= avctx
->priv_data
;
116 NV_ENCODE_API_FUNCTION_LIST
*nv
= &ctx
->nvel
.nvenc_funcs
;
119 params
.version
= NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER
;
120 params
.apiVersion
= NVENCAPI_VERSION
;
121 params
.device
= ctx
->cu_context
;
122 params
.deviceType
= NV_ENC_DEVICE_TYPE_CUDA
;
124 ret
= nv
->nvEncOpenEncodeSessionEx(¶ms
, &ctx
->nvenc_ctx
);
125 if (ret
!= NV_ENC_SUCCESS
) {
126 ctx
->nvenc_ctx
= NULL
;
127 av_log(avctx
, AV_LOG_ERROR
,
128 "Cannot open the NVENC Session\n");
129 return AVERROR_UNKNOWN
;
135 static int nvenc_check_codec_support(AVCodecContext
*avctx
)
137 NVENCContext
*ctx
= avctx
->priv_data
;
138 NV_ENCODE_API_FUNCTION_LIST
*nv
= &ctx
->nvel
.nvenc_funcs
;
139 int i
, ret
, count
= 0;
142 ret
= nv
->nvEncGetEncodeGUIDCount(ctx
->nvenc_ctx
, &count
);
144 if (ret
!= NV_ENC_SUCCESS
|| !count
)
145 return AVERROR(ENOSYS
);
147 guids
= av_malloc(count
* sizeof(GUID
));
149 return AVERROR(ENOMEM
);
151 ret
= nv
->nvEncGetEncodeGUIDs(ctx
->nvenc_ctx
, guids
, count
, &count
);
152 if (ret
!= NV_ENC_SUCCESS
) {
153 ret
= AVERROR(ENOSYS
);
157 ret
= AVERROR(ENOSYS
);
158 for (i
= 0; i
< count
; i
++) {
159 if (!memcmp(&guids
[i
], &ctx
->params
.encodeGUID
, sizeof(*guids
))) {
171 static int nvenc_check_cap(AVCodecContext
*avctx
, NV_ENC_CAPS cap
)
173 NVENCContext
*ctx
= avctx
->priv_data
;
174 NV_ENCODE_API_FUNCTION_LIST
*nv
= &ctx
->nvel
.nvenc_funcs
;
175 NV_ENC_CAPS_PARAM params
= { 0 };
178 params
.version
= NV_ENC_CAPS_PARAM_VER
;
179 params
.capsToQuery
= cap
;
181 ret
= nv
->nvEncGetEncodeCaps(ctx
->nvenc_ctx
, ctx
->params
.encodeGUID
, ¶ms
, &val
);
183 if (ret
== NV_ENC_SUCCESS
)
188 static int nvenc_check_capabilities(AVCodecContext
*avctx
)
192 ret
= nvenc_check_codec_support(avctx
);
194 av_log(avctx
, AV_LOG_VERBOSE
, "Codec not supported\n");
198 ret
= nvenc_check_cap(avctx
, NV_ENC_CAPS_SUPPORT_YUV444_ENCODE
);
199 if (avctx
->pix_fmt
== AV_PIX_FMT_YUV444P
&& ret
<= 0) {
200 av_log(avctx
, AV_LOG_VERBOSE
, "YUV444P not supported\n");
201 return AVERROR(ENOSYS
);
204 ret
= nvenc_check_cap(avctx
, NV_ENC_CAPS_WIDTH_MAX
);
205 if (ret
< avctx
->width
) {
206 av_log(avctx
, AV_LOG_VERBOSE
, "Width %d exceeds %d\n",
208 return AVERROR(ENOSYS
);
211 ret
= nvenc_check_cap(avctx
, NV_ENC_CAPS_HEIGHT_MAX
);
212 if (ret
< avctx
->height
) {
213 av_log(avctx
, AV_LOG_VERBOSE
, "Height %d exceeds %d\n",
215 return AVERROR(ENOSYS
);
218 ret
= nvenc_check_cap(avctx
, NV_ENC_CAPS_NUM_MAX_BFRAMES
);
219 if (ret
< avctx
->max_b_frames
) {
220 av_log(avctx
, AV_LOG_VERBOSE
, "Max b-frames %d exceed %d\n",
221 avctx
->max_b_frames
, ret
);
223 return AVERROR(ENOSYS
);
229 static int nvenc_check_device(AVCodecContext
*avctx
, int idx
)
231 NVENCContext
*ctx
= avctx
->priv_data
;
232 NVENCLibraryContext
*nvel
= &ctx
->nvel
;
233 char name
[128] = { 0 };
234 int major
, minor
, ret
;
237 int loglevel
= AV_LOG_VERBOSE
;
239 if (ctx
->device
== LIST_DEVICES
)
240 loglevel
= AV_LOG_INFO
;
242 ret
= nvel
->cu_device_get(&cu_device
, idx
);
243 if (ret
!= CUDA_SUCCESS
) {
244 av_log(avctx
, AV_LOG_ERROR
,
245 "Cannot access the CUDA device %d\n",
250 ret
= nvel
->cu_device_get_name(name
, sizeof(name
), cu_device
);
251 if (ret
!= CUDA_SUCCESS
)
254 ret
= nvel
->cu_device_compute_capability(&major
, &minor
, cu_device
);
255 if (ret
!= CUDA_SUCCESS
)
258 av_log(avctx
, loglevel
, "Device %d [%s] ", cu_device
, name
);
260 if (((major
<< 4) | minor
) < NVENC_CAP
)
263 ret
= nvel
->cu_ctx_create(&ctx
->cu_context
, 0, cu_device
);
264 if (ret
!= CUDA_SUCCESS
)
267 ret
= nvel
->cu_ctx_pop_current(&dummy
);
268 if (ret
!= CUDA_SUCCESS
)
271 if ((ret
= nvenc_open_session(avctx
)) < 0)
274 if ((ret
= nvenc_check_capabilities(avctx
)) < 0)
277 av_log(avctx
, loglevel
, "supports NVENC\n");
279 if (ctx
->device
== cu_device
|| ctx
->device
== ANY_DEVICE
)
283 nvel
->nvenc_funcs
.nvEncDestroyEncoder(ctx
->nvenc_ctx
);
284 ctx
->nvenc_ctx
= NULL
;
287 nvel
->cu_ctx_destroy(ctx
->cu_context
);
288 ctx
->cu_context
= NULL
;
292 av_log(avctx
, loglevel
, "does not support NVENC (major %d minor %d)\n",
295 return AVERROR(ENOSYS
);
298 static int nvenc_setup_device(AVCodecContext
*avctx
)
300 NVENCContext
*ctx
= avctx
->priv_data
;
301 NVENCLibraryContext
*nvel
= &ctx
->nvel
;
302 int i
, nb_devices
= 0;
304 if ((nvel
->cu_init(0)) != CUDA_SUCCESS
) {
305 av_log(avctx
, AV_LOG_ERROR
,
306 "Cannot init CUDA\n");
307 return AVERROR_UNKNOWN
;
310 if ((nvel
->cu_device_get_count(&nb_devices
)) != CUDA_SUCCESS
) {
311 av_log(avctx
, AV_LOG_ERROR
,
312 "Cannot enumerate the CUDA devices\n");
313 return AVERROR_UNKNOWN
;
316 switch (avctx
->codec
->id
) {
317 case AV_CODEC_ID_H264
:
318 ctx
->params
.encodeGUID
= NV_ENC_CODEC_H264_GUID
;
320 case AV_CODEC_ID_HEVC
:
321 ctx
->params
.encodeGUID
= NV_ENC_CODEC_HEVC_GUID
;
327 for (i
= 0; i
< nb_devices
; ++i
) {
328 if ((nvenc_check_device(avctx
, i
)) >= 0 && ctx
->device
!= LIST_DEVICES
)
332 if (ctx
->device
== LIST_DEVICES
)
335 return AVERROR(ENOSYS
);
338 typedef struct GUIDTuple
{
343 static int nvec_map_preset(NVENCContext
*ctx
)
345 GUIDTuple presets
[] = {
346 { NV_ENC_PRESET_DEFAULT_GUID
},
347 { NV_ENC_PRESET_HP_GUID
},
348 { NV_ENC_PRESET_HQ_GUID
},
349 { NV_ENC_PRESET_BD_GUID
},
350 { NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID
, NVENC_LOWLATENCY
},
351 { NV_ENC_PRESET_LOW_LATENCY_HP_GUID
, NVENC_LOWLATENCY
},
352 { NV_ENC_PRESET_LOW_LATENCY_HQ_GUID
, NVENC_LOWLATENCY
},
353 { NV_ENC_PRESET_LOSSLESS_DEFAULT_GUID
, NVENC_LOSSLESS
},
354 { NV_ENC_PRESET_LOSSLESS_HP_GUID
, NVENC_LOSSLESS
},
358 GUIDTuple
*t
= &presets
[ctx
->preset
];
360 ctx
->params
.presetGUID
= t
->guid
;
361 ctx
->flags
= t
->flags
;
363 return AVERROR(EINVAL
);
366 static void set_constqp(AVCodecContext
*avctx
, NV_ENC_RC_PARAMS
*rc
)
368 rc
->rateControlMode
= NV_ENC_PARAMS_RC_CONSTQP
;
369 rc
->constQP
.qpInterB
= avctx
->global_quality
;
370 rc
->constQP
.qpInterP
= avctx
->global_quality
;
371 rc
->constQP
.qpIntra
= avctx
->global_quality
;
374 static void set_vbr(AVCodecContext
*avctx
, NV_ENC_RC_PARAMS
*rc
)
376 if (avctx
->qmin
>= 0) {
378 rc
->minQP
.qpInterB
= avctx
->qmin
;
379 rc
->minQP
.qpInterP
= avctx
->qmin
;
380 rc
->minQP
.qpIntra
= avctx
->qmin
;
383 if (avctx
->qmax
>= 0) {
385 rc
->maxQP
.qpInterB
= avctx
->qmax
;
386 rc
->maxQP
.qpInterP
= avctx
->qmax
;
387 rc
->maxQP
.qpIntra
= avctx
->qmax
;
391 static void nvenc_override_rate_control(AVCodecContext
*avctx
,
392 NV_ENC_RC_PARAMS
*rc
)
394 NVENCContext
*ctx
= avctx
->priv_data
;
397 case NV_ENC_PARAMS_RC_CONSTQP
:
398 if (avctx
->global_quality
< 0) {
399 av_log(avctx
, AV_LOG_WARNING
,
400 "The constant quality rate-control requires "
401 "the 'global_quality' option set.\n");
404 set_constqp(avctx
, rc
);
406 case NV_ENC_PARAMS_RC_2_PASS_VBR
:
407 case NV_ENC_PARAMS_RC_VBR
:
408 if (avctx
->qmin
< 0 && avctx
->qmax
< 0) {
409 av_log(avctx
, AV_LOG_WARNING
,
410 "The variable bitrate rate-control requires "
411 "the 'qmin' and/or 'qmax' option set.\n");
414 case NV_ENC_PARAMS_RC_VBR_MINQP
:
415 if (avctx
->qmin
< 0) {
416 av_log(avctx
, AV_LOG_WARNING
,
417 "The variable bitrate rate-control requires "
418 "the 'qmin' option set.\n");
423 case NV_ENC_PARAMS_RC_CBR
:
425 case NV_ENC_PARAMS_RC_2_PASS_QUALITY
:
426 case NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP
:
427 if (!(ctx
->flags
& NVENC_LOWLATENCY
)) {
428 av_log(avctx
, AV_LOG_WARNING
,
429 "The multipass rate-control requires "
430 "a low-latency preset.\n");
435 rc
->rateControlMode
= ctx
->rc
;
438 static void nvenc_setup_rate_control(AVCodecContext
*avctx
)
440 NVENCContext
*ctx
= avctx
->priv_data
;
441 NV_ENC_RC_PARAMS
*rc
= &ctx
->config
.rcParams
;
443 if (avctx
->bit_rate
> 0)
444 rc
->averageBitRate
= avctx
->bit_rate
;
446 if (avctx
->rc_max_rate
> 0)
447 rc
->maxBitRate
= avctx
->rc_max_rate
;
450 nvenc_override_rate_control(avctx
, rc
);
451 } else if (avctx
->global_quality
> 0) {
452 set_constqp(avctx
, rc
);
453 } else if (avctx
->qmin
>= 0 && avctx
->qmax
>= 0) {
454 rc
->rateControlMode
= NV_ENC_PARAMS_RC_VBR
;
458 if (avctx
->rc_buffer_size
> 0)
459 rc
->vbvBufferSize
= avctx
->rc_buffer_size
;
461 if (rc
->averageBitRate
> 0)
462 avctx
->bit_rate
= rc
->averageBitRate
;
465 static int nvenc_setup_h264_config(AVCodecContext
*avctx
)
467 NVENCContext
*ctx
= avctx
->priv_data
;
468 NV_ENC_CONFIG
*cc
= &ctx
->config
;
469 NV_ENC_CONFIG_H264
*h264
= &cc
->encodeCodecConfig
.h264Config
;
470 NV_ENC_CONFIG_H264_VUI_PARAMETERS
*vui
= &h264
->h264VUIParameters
;
472 vui
->colourDescriptionPresentFlag
= 1;
473 vui
->videoSignalTypePresentFlag
= 1;
475 vui
->colourMatrix
= avctx
->colorspace
;
476 vui
->colourPrimaries
= avctx
->color_primaries
;
477 vui
->transferCharacteristics
= avctx
->color_trc
;
479 vui
->videoFullRangeFlag
= avctx
->color_range
== AVCOL_RANGE_JPEG
;
481 h264
->disableSPSPPS
= (avctx
->flags
& CODEC_FLAG_GLOBAL_HEADER
) ?
1 : 0;
482 h264
->repeatSPSPPS
= (avctx
->flags
& CODEC_FLAG_GLOBAL_HEADER
) ?
0 : 1;
484 h264
->maxNumRefFrames
= avctx
->refs
;
485 h264
->idrPeriod
= cc
->gopLength
;
488 avctx
->profile
= ctx
->profile
;
490 if (avctx
->pix_fmt
== AV_PIX_FMT_YUV444P
)
491 h264
->chromaFormatIDC
= 3;
493 h264
->chromaFormatIDC
= 1;
495 switch (ctx
->profile
) {
496 case NV_ENC_H264_PROFILE_BASELINE
:
497 cc
->profileGUID
= NV_ENC_H264_PROFILE_BASELINE_GUID
;
499 case NV_ENC_H264_PROFILE_MAIN
:
500 cc
->profileGUID
= NV_ENC_H264_PROFILE_MAIN_GUID
;
502 case NV_ENC_H264_PROFILE_HIGH
:
503 cc
->profileGUID
= NV_ENC_H264_PROFILE_HIGH_GUID
;
505 case NV_ENC_H264_PROFILE_HIGH_444
:
506 cc
->profileGUID
= NV_ENC_H264_PROFILE_HIGH_444_GUID
;
508 case NV_ENC_H264_PROFILE_CONSTRAINED_HIGH
:
509 cc
->profileGUID
= NV_ENC_H264_PROFILE_CONSTRAINED_HIGH_GUID
;
513 h264
->level
= ctx
->level
;
518 static int nvenc_setup_hevc_config(AVCodecContext
*avctx
)
520 NVENCContext
*ctx
= avctx
->priv_data
;
521 NV_ENC_CONFIG
*cc
= &ctx
->config
;
522 NV_ENC_CONFIG_HEVC
*hevc
= &cc
->encodeCodecConfig
.hevcConfig
;
524 hevc
->disableSPSPPS
= (avctx
->flags
& CODEC_FLAG_GLOBAL_HEADER
) ?
1 : 0;
525 hevc
->repeatSPSPPS
= (avctx
->flags
& CODEC_FLAG_GLOBAL_HEADER
) ?
0 : 1;
527 hevc
->maxNumRefFramesInDPB
= avctx
->refs
;
528 hevc
->idrPeriod
= cc
->gopLength
;
530 /* No other profile is supported in the current SDK version 5 */
531 cc
->profileGUID
= NV_ENC_HEVC_PROFILE_MAIN_GUID
;
532 avctx
->profile
= FF_PROFILE_HEVC_MAIN
;
535 hevc
->level
= ctx
->level
;
537 hevc
->level
= NV_ENC_LEVEL_AUTOSELECT
;
541 hevc
->tier
= ctx
->tier
;
546 static int nvenc_setup_codec_config(AVCodecContext
*avctx
)
548 switch (avctx
->codec
->id
) {
549 case AV_CODEC_ID_H264
:
550 return nvenc_setup_h264_config(avctx
);
551 case AV_CODEC_ID_HEVC
:
552 return nvenc_setup_hevc_config(avctx
);
557 static int nvenc_setup_encoder(AVCodecContext
*avctx
)
559 NVENCContext
*ctx
= avctx
->priv_data
;
560 NV_ENCODE_API_FUNCTION_LIST
*nv
= &ctx
->nvel
.nvenc_funcs
;
561 NV_ENC_PRESET_CONFIG preset_cfg
= { 0 };
564 ctx
->params
.version
= NV_ENC_INITIALIZE_PARAMS_VER
;
566 ctx
->params
.encodeHeight
= avctx
->height
;
567 ctx
->params
.encodeWidth
= avctx
->width
;
569 if (avctx
->sample_aspect_ratio
.num
&&
570 avctx
->sample_aspect_ratio
.den
&&
571 (avctx
->sample_aspect_ratio
.num
!= 1 ||
572 avctx
->sample_aspect_ratio
.den
!= 1)) {
573 av_reduce(&ctx
->params
.darWidth
,
574 &ctx
->params
.darHeight
,
575 avctx
->width
* avctx
->sample_aspect_ratio
.num
,
576 avctx
->height
* avctx
->sample_aspect_ratio
.den
,
579 ctx
->params
.darHeight
= avctx
->height
;
580 ctx
->params
.darWidth
= avctx
->width
;
583 ctx
->params
.frameRateNum
= avctx
->time_base
.den
;
584 ctx
->params
.frameRateDen
= avctx
->time_base
.num
* avctx
->ticks_per_frame
;
586 ctx
->params
.enableEncodeAsync
= 0;
587 ctx
->params
.enablePTD
= 1;
589 ctx
->params
.encodeConfig
= &ctx
->config
;
591 nvec_map_preset(ctx
);
593 preset_cfg
.version
= NV_ENC_PRESET_CONFIG_VER
;
594 preset_cfg
.presetCfg
.version
= NV_ENC_CONFIG_VER
;
596 ret
= nv
->nvEncGetEncodePresetConfig(ctx
->nvenc_ctx
,
597 ctx
->params
.encodeGUID
,
598 ctx
->params
.presetGUID
,
600 if (ret
!= NV_ENC_SUCCESS
) {
601 av_log(avctx
, AV_LOG_ERROR
,
602 "Cannot get the preset configuration\n");
603 return AVERROR_UNKNOWN
;
606 memcpy(&ctx
->config
, &preset_cfg
.presetCfg
, sizeof(ctx
->config
));
608 ctx
->config
.version
= NV_ENC_CONFIG_VER
;
610 if (avctx
->gop_size
> 0) {
611 if (avctx
->max_b_frames
> 0) {
616 * 3 two B frames, and so on. */
617 ctx
->config
.frameIntervalP
= avctx
->max_b_frames
+ 1;
618 } else if (avctx
->max_b_frames
== 0) {
619 ctx
->config
.frameIntervalP
= 1;
621 ctx
->config
.gopLength
= avctx
->gop_size
;
622 } else if (avctx
->gop_size
== 0) {
623 ctx
->config
.frameIntervalP
= 0;
624 ctx
->config
.gopLength
= 1;
627 if (ctx
->config
.frameIntervalP
> 1)
628 avctx
->max_b_frames
= ctx
->config
.frameIntervalP
- 1;
630 nvenc_setup_rate_control(avctx
);
632 if (avctx
->flags
& CODEC_FLAG_INTERLACED_DCT
) {
633 ctx
->config
.frameFieldMode
= NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD
;
635 ctx
->config
.frameFieldMode
= NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME
;
638 if ((ret
= nvenc_setup_codec_config(avctx
)) < 0)
641 ret
= nv
->nvEncInitializeEncoder(ctx
->nvenc_ctx
, &ctx
->params
);
642 if (ret
!= NV_ENC_SUCCESS
) {
643 av_log(avctx
, AV_LOG_ERROR
, "Cannot initialize the decoder");
644 return AVERROR_UNKNOWN
;
650 static int nvenc_alloc_surface(AVCodecContext
*avctx
, int idx
)
652 NVENCContext
*ctx
= avctx
->priv_data
;
653 NV_ENCODE_API_FUNCTION_LIST
*nv
= &ctx
->nvel
.nvenc_funcs
;
655 NV_ENC_CREATE_INPUT_BUFFER in_buffer
= { 0 };
656 NV_ENC_CREATE_BITSTREAM_BUFFER out_buffer
= { 0 };
658 in_buffer
.version
= NV_ENC_CREATE_INPUT_BUFFER_VER
;
659 out_buffer
.version
= NV_ENC_CREATE_BITSTREAM_BUFFER_VER
;
661 in_buffer
.width
= avctx
->width
;
662 in_buffer
.height
= avctx
->height
;
664 in_buffer
.memoryHeap
= NV_ENC_MEMORY_HEAP_SYSMEM_UNCACHED
;
666 switch (avctx
->pix_fmt
) {
667 case AV_PIX_FMT_YUV420P
:
668 in_buffer
.bufferFmt
= NV_ENC_BUFFER_FORMAT_YV12_PL
;
670 case AV_PIX_FMT_NV12
:
671 in_buffer
.bufferFmt
= NV_ENC_BUFFER_FORMAT_NV12_PL
;
673 case AV_PIX_FMT_YUV444P
:
674 in_buffer
.bufferFmt
= NV_ENC_BUFFER_FORMAT_YUV444_PL
;
680 ret
= nv
->nvEncCreateInputBuffer(ctx
->nvenc_ctx
, &in_buffer
);
681 if (ret
!= NV_ENC_SUCCESS
) {
682 av_log(avctx
, AV_LOG_ERROR
, "CreateInputBuffer failed\n");
683 return AVERROR_UNKNOWN
;
686 ctx
->in
[idx
].in
= in_buffer
.inputBuffer
;
687 ctx
->in
[idx
].format
= in_buffer
.bufferFmt
;
689 /* 1MB is large enough to hold most output frames.
690 * NVENC increases this automaticaly if it's not enough. */
691 out_buffer
.size
= BITSTREAM_BUFFER_SIZE
;
693 out_buffer
.memoryHeap
= NV_ENC_MEMORY_HEAP_SYSMEM_UNCACHED
;
695 ret
= nv
->nvEncCreateBitstreamBuffer(ctx
->nvenc_ctx
, &out_buffer
);
696 if (ret
!= NV_ENC_SUCCESS
) {
697 av_log(avctx
, AV_LOG_ERROR
, "CreateBitstreamBuffer failed\n");
698 return AVERROR_UNKNOWN
;
701 ctx
->out
[idx
].out
= out_buffer
.bitstreamBuffer
;
702 ctx
->out
[idx
].busy
= 0;
707 static int nvenc_setup_surfaces(AVCodecContext
*avctx
)
709 NVENCContext
*ctx
= avctx
->priv_data
;
712 ctx
->nb_surfaces
= FFMAX(4 + avctx
->max_b_frames
,
715 ctx
->in
= av_mallocz(ctx
->nb_surfaces
* sizeof(*ctx
->in
));
717 return AVERROR(ENOMEM
);
719 ctx
->out
= av_mallocz(ctx
->nb_surfaces
* sizeof(*ctx
->out
));
721 return AVERROR(ENOMEM
);
723 ctx
->timestamps
= av_fifo_alloc(ctx
->nb_surfaces
* sizeof(int64_t));
724 if (!ctx
->timestamps
)
725 return AVERROR(ENOMEM
);
726 ctx
->pending
= av_fifo_alloc(ctx
->nb_surfaces
* sizeof(ctx
->out
));
728 return AVERROR(ENOMEM
);
729 ctx
->ready
= av_fifo_alloc(ctx
->nb_surfaces
* sizeof(ctx
->out
));
731 return AVERROR(ENOMEM
);
733 for (i
= 0; i
< ctx
->nb_surfaces
; i
++) {
734 if ((ret
= nvenc_alloc_surface(avctx
, i
)) < 0)
741 #define EXTRADATA_SIZE 512
743 static int nvenc_setup_extradata(AVCodecContext
*avctx
)
745 NVENCContext
*ctx
= avctx
->priv_data
;
746 NV_ENCODE_API_FUNCTION_LIST
*nv
= &ctx
->nvel
.nvenc_funcs
;
747 NV_ENC_SEQUENCE_PARAM_PAYLOAD payload
= { 0 };
750 avctx
->extradata
= av_mallocz(EXTRADATA_SIZE
+ FF_INPUT_BUFFER_PADDING_SIZE
);
751 if (!avctx
->extradata
)
752 return AVERROR(ENOMEM
);
754 payload
.version
= NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER
;
755 payload
.spsppsBuffer
= avctx
->extradata
;
756 payload
.inBufferSize
= EXTRADATA_SIZE
;
757 payload
.outSPSPPSPayloadSize
= &avctx
->extradata_size
;
759 ret
= nv
->nvEncGetSequenceParams(ctx
->nvenc_ctx
, &payload
);
760 if (ret
!= NV_ENC_SUCCESS
) {
761 av_log(avctx
, AV_LOG_ERROR
, "Cannot get the extradata\n");
762 return AVERROR_UNKNOWN
;
768 av_cold
int ff_nvenc_encode_close(AVCodecContext
*avctx
)
770 NVENCContext
*ctx
= avctx
->priv_data
;
771 NV_ENCODE_API_FUNCTION_LIST
*nv
= &ctx
->nvel
.nvenc_funcs
;
775 for (i
= 0; i
< ctx
->nb_surfaces
; ++i
) {
776 nv
->nvEncDestroyInputBuffer(ctx
->nvenc_ctx
, ctx
->in
[i
].in
);
777 nv
->nvEncDestroyBitstreamBuffer(ctx
->nvenc_ctx
, ctx
->out
[i
].out
);
785 nv
->nvEncDestroyEncoder(ctx
->nvenc_ctx
);
788 ctx
->nvel
.cu_ctx_destroy(ctx
->cu_context
);
791 dlclose(ctx
->nvel
.nvenc
);
794 dlclose(ctx
->nvel
.cuda
);
799 av_cold
int ff_nvenc_encode_init(AVCodecContext
*avctx
)
803 if ((ret
= nvenc_load_libraries(avctx
)) < 0)
806 if ((ret
= nvenc_setup_device(avctx
)) < 0)
809 if ((ret
= nvenc_setup_encoder(avctx
)) < 0)
812 if ((ret
= nvenc_setup_surfaces(avctx
)) < 0)
815 if (avctx
->flags
& CODEC_FLAG_GLOBAL_HEADER
) {
816 if ((ret
= nvenc_setup_extradata(avctx
)) < 0)
823 static NVENCInputSurface
*get_input_surface(NVENCContext
*ctx
)
827 for (i
= 0; i
< ctx
->nb_surfaces
; i
++) {
828 if (!ctx
->in
[i
].locked
) {
829 ctx
->in
[i
].locked
= 1;
837 static NVENCOutputSurface
*get_output_surface(NVENCContext
*ctx
)
841 for (i
= 0; i
< ctx
->nb_surfaces
; i
++) {
842 if (!ctx
->out
[i
].busy
) {
850 static int nvenc_copy_frame(NV_ENC_LOCK_INPUT_BUFFER
*in
, const AVFrame
*frame
)
852 uint8_t *buf
= in
->bufferDataPtr
;
853 int off
= frame
->height
* in
->pitch
;
855 switch (frame
->format
) {
856 case AV_PIX_FMT_YUV420P
:
857 av_image_copy_plane(buf
, in
->pitch
,
858 frame
->data
[0], frame
->linesize
[0],
859 frame
->width
, frame
->height
);
862 av_image_copy_plane(buf
, in
->pitch
>> 1,
863 frame
->data
[2], frame
->linesize
[2],
864 frame
->width
>> 1, frame
->height
>> 1);
868 av_image_copy_plane(buf
, in
->pitch
>> 1,
869 frame
->data
[1], frame
->linesize
[1],
870 frame
->width
>> 1, frame
->height
>> 1);
872 case AV_PIX_FMT_NV12
:
873 av_image_copy_plane(buf
, in
->pitch
,
874 frame
->data
[0], frame
->linesize
[0],
875 frame
->width
, frame
->height
);
878 av_image_copy_plane(buf
, in
->pitch
,
879 frame
->data
[1], frame
->linesize
[1],
880 frame
->width
, frame
->height
>> 1);
882 case AV_PIX_FMT_YUV444P
:
883 av_image_copy_plane(buf
, in
->pitch
,
884 frame
->data
[0], frame
->linesize
[0],
885 frame
->width
, frame
->height
);
888 av_image_copy_plane(buf
, in
->pitch
,
889 frame
->data
[1], frame
->linesize
[1],
890 frame
->width
, frame
->height
);
893 av_image_copy_plane(buf
, in
->pitch
,
894 frame
->data
[2], frame
->linesize
[2],
895 frame
->width
, frame
->height
);
904 static int nvenc_enqueue_frame(AVCodecContext
*avctx
, const AVFrame
*frame
,
905 NVENCInputSurface
**in_surf
)
907 NVENCContext
*ctx
= avctx
->priv_data
;
908 NV_ENCODE_API_FUNCTION_LIST
*nv
= &ctx
->nvel
.nvenc_funcs
;
909 NV_ENC_LOCK_INPUT_BUFFER params
= { 0 };
910 NVENCInputSurface
*in
= get_input_surface(ctx
);
916 params
.version
= NV_ENC_LOCK_INPUT_BUFFER_VER
;
917 params
.inputBuffer
= in
->in
;
920 ret
= nv
->nvEncLockInputBuffer(ctx
->nvenc_ctx
, ¶ms
);
921 if (ret
!= NV_ENC_SUCCESS
) {
922 av_log(avctx
, AV_LOG_ERROR
, "Cannot lock the buffer %p.\n",
924 return AVERROR_UNKNOWN
;
927 ret
= nvenc_copy_frame(¶ms
, frame
);
931 ret
= nv
->nvEncUnlockInputBuffer(ctx
->nvenc_ctx
, in
->in
);
932 if (ret
!= NV_ENC_SUCCESS
) {
933 av_log(avctx
, AV_LOG_ERROR
, "Cannot unlock the buffer %p.\n",
935 return AVERROR_UNKNOWN
;
943 nv
->nvEncUnlockInputBuffer(ctx
->nvenc_ctx
, in
->in
);
948 static void nvenc_codec_specific_pic_params(AVCodecContext
*avctx
,
949 NV_ENC_PIC_PARAMS
*params
)
951 NVENCContext
*ctx
= avctx
->priv_data
;
953 switch (avctx
->codec
->id
) {
954 case AV_CODEC_ID_H264
:
955 params
->codecPicParams
.h264PicParams
.sliceMode
=
956 ctx
->config
.encodeCodecConfig
.h264Config
.sliceMode
;
957 params
->codecPicParams
.h264PicParams
.sliceModeData
=
958 ctx
->config
.encodeCodecConfig
.h264Config
.sliceModeData
;
960 case AV_CODEC_ID_HEVC
:
961 params
->codecPicParams
.hevcPicParams
.sliceMode
=
962 ctx
->config
.encodeCodecConfig
.hevcConfig
.sliceMode
;
963 params
->codecPicParams
.hevcPicParams
.sliceModeData
=
964 ctx
->config
.encodeCodecConfig
.hevcConfig
.sliceModeData
;
969 static inline int nvenc_enqueue_timestamp(AVFifoBuffer
*f
, int64_t pts
)
971 return av_fifo_generic_write(f
, &pts
, sizeof(pts
), NULL
);
974 static inline int nvenc_dequeue_timestamp(AVFifoBuffer
*f
, int64_t *pts
)
976 return av_fifo_generic_read(f
, pts
, sizeof(*pts
), NULL
);
979 static inline int nvenc_enqueue_surface(AVFifoBuffer
*f
,
980 NVENCOutputSurface
*surf
)
983 return av_fifo_generic_write(f
, &surf
, sizeof(surf
), NULL
);
986 static inline int nvenc_dequeue_surface(AVFifoBuffer
*f
,
987 NVENCOutputSurface
**surf
)
989 return av_fifo_generic_read(f
, surf
, sizeof(*surf
), NULL
);
992 static int nvenc_set_timestamp(NVENCContext
*ctx
,
993 NV_ENC_LOCK_BITSTREAM
*params
,
996 pkt
->pts
= params
->outputTimeStamp
;
997 pkt
->duration
= params
->outputDuration
;
999 return nvenc_dequeue_timestamp(ctx
->timestamps
, &pkt
->dts
);
1002 static int nvenc_get_frame(AVCodecContext
*avctx
, AVPacket
*pkt
)
1004 NVENCContext
*ctx
= avctx
->priv_data
;
1005 NV_ENCODE_API_FUNCTION_LIST
*nv
= &ctx
->nvel
.nvenc_funcs
;
1006 NV_ENC_LOCK_BITSTREAM params
= { 0 };
1007 NVENCOutputSurface
*out
= NULL
;
1010 ret
= nvenc_dequeue_surface(ctx
->pending
, &out
);
1014 params
.version
= NV_ENC_LOCK_BITSTREAM_VER
;
1015 params
.outputBitstream
= out
->out
;
1017 ret
= nv
->nvEncLockBitstream(ctx
->nvenc_ctx
, ¶ms
);
1019 return AVERROR_UNKNOWN
;
1021 ret
= ff_alloc_packet(pkt
, params
.bitstreamSizeInBytes
);
1025 memcpy(pkt
->data
, params
.bitstreamBufferPtr
, pkt
->size
);
1027 ret
= nv
->nvEncUnlockBitstream(ctx
->nvenc_ctx
, out
->out
);
1029 return AVERROR_UNKNOWN
;
1031 out
->busy
= out
->in
->locked
= 0;
1033 ret
= nvenc_set_timestamp(ctx
, ¶ms
, pkt
);
1037 switch (params
.pictureType
) {
1038 case NV_ENC_PIC_TYPE_IDR
:
1039 pkt
->flags
|= AV_PKT_FLAG_KEY
;
1040 case NV_ENC_PIC_TYPE_INTRA_REFRESH
:
1041 case NV_ENC_PIC_TYPE_I
:
1042 avctx
->coded_frame
->pict_type
= AV_PICTURE_TYPE_I
;
1044 case NV_ENC_PIC_TYPE_P
:
1045 avctx
->coded_frame
->pict_type
= AV_PICTURE_TYPE_P
;
1047 case NV_ENC_PIC_TYPE_B
:
1048 avctx
->coded_frame
->pict_type
= AV_PICTURE_TYPE_B
;
1050 case NV_ENC_PIC_TYPE_BI
:
1051 avctx
->coded_frame
->pict_type
= AV_PICTURE_TYPE_BI
;
1058 int ff_nvenc_encode_frame(AVCodecContext
*avctx
, AVPacket
*pkt
,
1059 const AVFrame
*frame
, int *got_packet
)
1061 NVENCContext
*ctx
= avctx
->priv_data
;
1062 NV_ENCODE_API_FUNCTION_LIST
*nv
= &ctx
->nvel
.nvenc_funcs
;
1063 NV_ENC_PIC_PARAMS params
= { 0 };
1064 NVENCInputSurface
*in
= NULL
;
1065 NVENCOutputSurface
*out
= NULL
;
1068 params
.version
= NV_ENC_PIC_PARAMS_VER
;
1071 ret
= nvenc_enqueue_frame(avctx
, frame
, &in
);
1074 out
= get_output_surface(ctx
);
1080 params
.inputBuffer
= in
->in
;
1081 params
.bufferFmt
= in
->format
;
1082 params
.inputWidth
= frame
->width
;
1083 params
.inputHeight
= frame
->height
;
1084 params
.outputBitstream
= out
->out
;
1085 params
.inputTimeStamp
= frame
->pts
;
1087 if (avctx
->flags
& CODEC_FLAG_INTERLACED_DCT
) {
1088 if (frame
->top_field_first
)
1089 params
.pictureStruct
= NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM
;
1091 params
.pictureStruct
= NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP
;
1093 params
.pictureStruct
= NV_ENC_PIC_STRUCT_FRAME
;
1096 nvenc_codec_specific_pic_params(avctx
, ¶ms
);
1098 ret
= nvenc_enqueue_timestamp(ctx
->timestamps
, frame
->pts
);
1102 params
.encodePicFlags
= NV_ENC_PIC_FLAG_EOS
;
1105 ret
= nv
->nvEncEncodePicture(ctx
->nvenc_ctx
, ¶ms
);
1107 if (ret
!= NV_ENC_SUCCESS
&&
1108 ret
!= NV_ENC_ERR_NEED_MORE_INPUT
) {
1110 return AVERROR_UNKNOWN
;
1114 ret
= nvenc_enqueue_surface(ctx
->pending
, out
);
1119 if (ret
!= NV_ENC_ERR_NEED_MORE_INPUT
&&
1120 av_fifo_size(ctx
->pending
)) {
1121 ret
= nvenc_get_frame(avctx
, pkt
);