Add Haivision SRT protocol
[libav.git] / libavutil / hwcontext_cuda.c
CommitLineData
ad884d10
AK
1/*
2 * This file is part of Libav.
3 *
4 * Libav is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * Libav is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with Libav; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19#include "buffer.h"
20#include "common.h"
21#include "hwcontext.h"
22#include "hwcontext_internal.h"
23#include "hwcontext_cuda.h"
24#include "mem.h"
25#include "pixdesc.h"
26#include "pixfmt.h"
27
28typedef struct CUDAFramesContext {
29 int shift_width, shift_height;
30} CUDAFramesContext;
31
32static const enum AVPixelFormat supported_formats[] = {
33 AV_PIX_FMT_NV12,
34 AV_PIX_FMT_YUV420P,
340f12f7 35 AV_PIX_FMT_P010,
ad884d10 36 AV_PIX_FMT_YUV444P,
340f12f7 37 AV_PIX_FMT_YUV444P16,
ad884d10
AK
38};
39
3ad82579 40static int cuda_frames_get_constraints(AVHWDeviceContext *ctx,
41 const void *hwconfig,
42 AVHWFramesConstraints *constraints)
43{
44 int i;
45
46 constraints->valid_sw_formats = av_malloc_array(FF_ARRAY_ELEMS(supported_formats) + 1,
47 sizeof(*constraints->valid_sw_formats));
48 if (!constraints->valid_sw_formats)
49 return AVERROR(ENOMEM);
50
51 for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
52 constraints->valid_sw_formats[i] = supported_formats[i];
53 constraints->valid_sw_formats[FF_ARRAY_ELEMS(supported_formats)] = AV_PIX_FMT_NONE;
54
55 constraints->valid_hw_formats = av_malloc_array(2, sizeof(*constraints->valid_hw_formats));
56 if (!constraints->valid_hw_formats)
57 return AVERROR(ENOMEM);
58
59 constraints->valid_hw_formats[0] = AV_PIX_FMT_CUDA;
60 constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
61
62 return 0;
63}
64
ad884d10
AK
65static void cuda_buffer_free(void *opaque, uint8_t *data)
66{
67 AVHWFramesContext *ctx = opaque;
68 AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
69
70 CUcontext dummy;
71
72 cuCtxPushCurrent(hwctx->cuda_ctx);
73
74 cuMemFree((CUdeviceptr)data);
75
76 cuCtxPopCurrent(&dummy);
77}
78
79static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
80{
81 AVHWFramesContext *ctx = opaque;
82 AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
83
84 AVBufferRef *ret = NULL;
85 CUcontext dummy = NULL;
86 CUdeviceptr data;
87 CUresult err;
88
89 err = cuCtxPushCurrent(hwctx->cuda_ctx);
90 if (err != CUDA_SUCCESS) {
91 av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
92 return NULL;
93 }
94
95 err = cuMemAlloc(&data, size);
96 if (err != CUDA_SUCCESS)
97 goto fail;
98
99 ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
100 if (!ret) {
101 cuMemFree(data);
102 goto fail;
103 }
104
105fail:
106 cuCtxPopCurrent(&dummy);
107 return ret;
108}
109
110static int cuda_frames_init(AVHWFramesContext *ctx)
111{
112 CUDAFramesContext *priv = ctx->internal->priv;
113 int i;
114
115 for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
116 if (ctx->sw_format == supported_formats[i])
117 break;
118 }
119 if (i == FF_ARRAY_ELEMS(supported_formats)) {
120 av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n",
121 av_get_pix_fmt_name(ctx->sw_format));
122 return AVERROR(ENOSYS);
123 }
124
125 av_pix_fmt_get_chroma_sub_sample(ctx->sw_format, &priv->shift_width, &priv->shift_height);
126
127 if (!ctx->pool) {
128 int size;
129
130 switch (ctx->sw_format) {
131 case AV_PIX_FMT_NV12:
132 case AV_PIX_FMT_YUV420P:
133 size = ctx->width * ctx->height * 3 / 2;
134 break;
340f12f7
YKG
135 case AV_PIX_FMT_P010:
136 size = ctx->width * ctx->height * 3;
137 break;
ad884d10
AK
138 case AV_PIX_FMT_YUV444P:
139 size = ctx->width * ctx->height * 3;
140 break;
340f12f7
YKG
141 case AV_PIX_FMT_YUV444P16:
142 size = ctx->width * ctx->height * 6;
143 break;
ad884d10
AK
144 }
145
146 ctx->internal->pool_internal = av_buffer_pool_init2(size, ctx, cuda_pool_alloc, NULL);
147 if (!ctx->internal->pool_internal)
148 return AVERROR(ENOMEM);
149 }
150
151 return 0;
152}
153
154static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
155{
156 frame->buf[0] = av_buffer_pool_get(ctx->pool);
157 if (!frame->buf[0])
158 return AVERROR(ENOMEM);
159
160 switch (ctx->sw_format) {
161 case AV_PIX_FMT_NV12:
162 frame->data[0] = frame->buf[0]->data;
163 frame->data[1] = frame->data[0] + ctx->width * ctx->height;
164 frame->linesize[0] = ctx->width;
165 frame->linesize[1] = ctx->width;
166 break;
167 case AV_PIX_FMT_YUV420P:
168 frame->data[0] = frame->buf[0]->data;
169 frame->data[2] = frame->data[0] + ctx->width * ctx->height;
170 frame->data[1] = frame->data[2] + ctx->width * ctx->height / 4;
171 frame->linesize[0] = ctx->width;
172 frame->linesize[1] = ctx->width / 2;
173 frame->linesize[2] = ctx->width / 2;
174 break;
340f12f7
YKG
175 case AV_PIX_FMT_P010:
176 frame->data[0] = frame->buf[0]->data;
177 frame->data[1] = frame->data[0] + 2 * ctx->width * ctx->height;
178 frame->linesize[0] = 2 * ctx->width;
179 frame->linesize[1] = 2 * ctx->width;
180 break;
ad884d10
AK
181 case AV_PIX_FMT_YUV444P:
182 frame->data[0] = frame->buf[0]->data;
183 frame->data[1] = frame->data[0] + ctx->width * ctx->height;
184 frame->data[2] = frame->data[1] + ctx->width * ctx->height;
185 frame->linesize[0] = ctx->width;
186 frame->linesize[1] = ctx->width;
187 frame->linesize[2] = ctx->width;
188 break;
340f12f7
YKG
189 case AV_PIX_FMT_YUV444P16:
190 frame->data[0] = frame->buf[0]->data;
191 frame->data[1] = frame->data[0] + 2 * ctx->width * ctx->height;
192 frame->data[2] = frame->data[1] + 2 * ctx->width * ctx->height;
193 frame->linesize[0] = 2 * ctx->width;
194 frame->linesize[1] = 2 * ctx->width;
195 frame->linesize[2] = 2 * ctx->width;
196 break;
ad884d10
AK
197 default:
198 av_frame_unref(frame);
199 return AVERROR_BUG;
200 }
201
202 frame->format = AV_PIX_FMT_CUDA;
203 frame->width = ctx->width;
204 frame->height = ctx->height;
205
206 return 0;
207}
208
209static int cuda_transfer_get_formats(AVHWFramesContext *ctx,
210 enum AVHWFrameTransferDirection dir,
211 enum AVPixelFormat **formats)
212{
213 enum AVPixelFormat *fmts;
214
215 fmts = av_malloc_array(2, sizeof(*fmts));
216 if (!fmts)
217 return AVERROR(ENOMEM);
218
219 fmts[0] = ctx->sw_format;
220 fmts[1] = AV_PIX_FMT_NONE;
221
222 *formats = fmts;
223
224 return 0;
225}
226
227static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
228 const AVFrame *src)
229{
230 CUDAFramesContext *priv = ctx->internal->priv;
231 AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
232
233 CUcontext dummy;
234 CUresult err;
235 int i;
236
237 err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
238 if (err != CUDA_SUCCESS)
239 return AVERROR_UNKNOWN;
240
241 for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
242 CUDA_MEMCPY2D cpy = {
243 .srcMemoryType = CU_MEMORYTYPE_DEVICE,
244 .dstMemoryType = CU_MEMORYTYPE_HOST,
245 .srcDevice = (CUdeviceptr)src->data[i],
246 .dstHost = dst->data[i],
247 .srcPitch = src->linesize[i],
248 .dstPitch = dst->linesize[i],
249 .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
250 .Height = src->height >> (i ? priv->shift_height : 0),
251 };
252
253 err = cuMemcpy2D(&cpy);
254 if (err != CUDA_SUCCESS) {
255 av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
256 return AVERROR_UNKNOWN;
257 }
258 }
259
260 cuCtxPopCurrent(&dummy);
261
262 return 0;
263}
264
265static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
266 const AVFrame *src)
267{
268 CUDAFramesContext *priv = ctx->internal->priv;
269 AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
270
271 CUcontext dummy;
272 CUresult err;
273 int i;
274
275 err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
276 if (err != CUDA_SUCCESS)
277 return AVERROR_UNKNOWN;
278
279 for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
280 CUDA_MEMCPY2D cpy = {
281 .srcMemoryType = CU_MEMORYTYPE_HOST,
282 .dstMemoryType = CU_MEMORYTYPE_DEVICE,
283 .srcHost = src->data[i],
284 .dstDevice = (CUdeviceptr)dst->data[i],
285 .srcPitch = src->linesize[i],
286 .dstPitch = dst->linesize[i],
287 .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
288 .Height = src->height >> (i ? priv->shift_height : 0),
289 };
290
291 err = cuMemcpy2D(&cpy);
292 if (err != CUDA_SUCCESS) {
293 av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
294 return AVERROR_UNKNOWN;
295 }
296 }
297
298 cuCtxPopCurrent(&dummy);
299
300 return 0;
301}
302
2e219b49
AK
303static void cuda_device_free(AVHWDeviceContext *ctx)
304{
305 AVCUDADeviceContext *hwctx = ctx->hwctx;
306 cuCtxDestroy(hwctx->cuda_ctx);
307}
308
309static int cuda_device_create(AVHWDeviceContext *ctx, const char *device,
310 AVDictionary *opts, int flags)
311{
312 AVCUDADeviceContext *hwctx = ctx->hwctx;
313 CUdevice cu_device;
314 CUcontext dummy;
315 CUresult err;
316 int device_idx = 0;
317
318 if (device)
319 device_idx = strtol(device, NULL, 0);
320
321 err = cuInit(0);
322 if (err != CUDA_SUCCESS) {
323 av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n");
324 return AVERROR_UNKNOWN;
325 }
326
327 err = cuDeviceGet(&cu_device, device_idx);
328 if (err != CUDA_SUCCESS) {
329 av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", device_idx);
330 return AVERROR_UNKNOWN;
331 }
332
333 err = cuCtxCreate(&hwctx->cuda_ctx, 0, cu_device);
334 if (err != CUDA_SUCCESS) {
335 av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n");
336 return AVERROR_UNKNOWN;
337 }
338
339 cuCtxPopCurrent(&dummy);
340
341 ctx->free = cuda_device_free;
342
343 return 0;
344}
345
ad884d10
AK
346const HWContextType ff_hwcontext_type_cuda = {
347 .type = AV_HWDEVICE_TYPE_CUDA,
348 .name = "CUDA",
349
350 .device_hwctx_size = sizeof(AVCUDADeviceContext),
351 .frames_priv_size = sizeof(CUDAFramesContext),
352
2e219b49 353 .device_create = cuda_device_create,
3ad82579 354 .frames_get_constraints = cuda_frames_get_constraints,
ad884d10
AK
355 .frames_init = cuda_frames_init,
356 .frames_get_buffer = cuda_get_buffer,
357 .transfer_get_formats = cuda_transfer_get_formats,
358 .transfer_data_to = cuda_transfer_data_to,
359 .transfer_data_from = cuda_transfer_data_from,
360
361 .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE },
362};