2 * This file is part of Libav.
4 * Libav is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * Libav is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with Libav; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
28 #include "libavutil/avstring.h"
29 #include "libavutil/common.h"
30 #include "libavutil/eval.h"
31 #include "libavutil/hwcontext.h"
32 #include "libavutil/hwcontext_cuda.h"
33 #include "libavutil/internal.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/opt.h"
36 #include "libavutil/pixdesc.h"
43 static const enum AVPixelFormat supported_formats
[] = {
49 static const enum AVPixelFormat deinterleaved_formats
[][2] = {
50 { AV_PIX_FMT_NV12
, AV_PIX_FMT_YUV420P
},
53 static const char *const var_names
[] = {
86 typedef struct NPPScaleStageContext
{
88 enum AVPixelFormat in_fmt
;
89 enum AVPixelFormat out_fmt
;
94 } planes_in
[3], planes_out
[3];
96 AVBufferRef
*frames_ctx
;
98 } NPPScaleStageContext
;
100 typedef struct NPPScaleContext
{
101 const AVClass
*class;
103 NPPScaleStageContext stages
[STAGE_NB
];
107 int shift_width
, shift_height
;
110 * New dimensions. Special values are:
111 * 0 = original width/height
112 * -1 = keep original aspect
117 * Output sw format. AV_PIX_FMT_NONE for no conversion.
119 enum AVPixelFormat format
;
121 char *w_expr
; ///< width expression string
122 char *h_expr
; ///< height expression string
128 static int nppscale_init(AVFilterContext
*ctx
)
130 NPPScaleContext
*s
= ctx
->priv
;
133 if (!strcmp(s
->format_str
, "same")) {
134 s
->format
= AV_PIX_FMT_NONE
;
136 s
->format
= av_get_pix_fmt(s
->format_str
);
137 if (s
->format
== AV_PIX_FMT_NONE
) {
138 av_log(ctx
, AV_LOG_ERROR
, "Unrecognized pixel format: %s\n", s
->format_str
);
139 return AVERROR(EINVAL
);
143 for (i
= 0; i
< FF_ARRAY_ELEMS(s
->stages
); i
++) {
144 s
->stages
[i
].frame
= av_frame_alloc();
145 if (!s
->stages
[i
].frame
)
146 return AVERROR(ENOMEM
);
148 s
->tmp_frame
= av_frame_alloc();
150 return AVERROR(ENOMEM
);
155 static void nppscale_uninit(AVFilterContext
*ctx
)
157 NPPScaleContext
*s
= ctx
->priv
;
160 for (i
= 0; i
< FF_ARRAY_ELEMS(s
->stages
); i
++) {
161 av_frame_free(&s
->stages
[i
].frame
);
162 av_buffer_unref(&s
->stages
[i
].frames_ctx
);
164 av_frame_free(&s
->tmp_frame
);
167 static int nppscale_query_formats(AVFilterContext
*ctx
)
169 static const enum AVPixelFormat pixel_formats
[] = {
170 AV_PIX_FMT_CUDA
, AV_PIX_FMT_NONE
,
172 AVFilterFormats
*pix_fmts
= ff_make_format_list(pixel_formats
);
174 ff_set_common_formats(ctx
, pix_fmts
);
179 static int init_stage(NPPScaleStageContext
*stage
, AVBufferRef
*device_ctx
)
181 AVBufferRef
*out_ref
= NULL
;
182 AVHWFramesContext
*out_ctx
;
183 int in_sw
, in_sh
, out_sw
, out_sh
;
186 av_pix_fmt_get_chroma_sub_sample(stage
->in_fmt
, &in_sw
, &in_sh
);
187 av_pix_fmt_get_chroma_sub_sample(stage
->out_fmt
, &out_sw
, &out_sh
);
188 if (!stage
->planes_out
[0].width
) {
189 stage
->planes_out
[0].width
= stage
->planes_in
[0].width
;
190 stage
->planes_out
[0].height
= stage
->planes_in
[0].height
;
193 for (i
= 1; i
< FF_ARRAY_ELEMS(stage
->planes_in
); i
++) {
194 stage
->planes_in
[i
].width
= stage
->planes_in
[0].width
>> in_sw
;
195 stage
->planes_in
[i
].height
= stage
->planes_in
[0].height
>> in_sh
;
196 stage
->planes_out
[i
].width
= stage
->planes_out
[0].width
>> out_sw
;
197 stage
->planes_out
[i
].height
= stage
->planes_out
[0].height
>> out_sh
;
200 out_ref
= av_hwframe_ctx_alloc(device_ctx
);
202 return AVERROR(ENOMEM
);
203 out_ctx
= (AVHWFramesContext
*)out_ref
->data
;
205 out_ctx
->format
= AV_PIX_FMT_CUDA
;
206 out_ctx
->sw_format
= stage
->out_fmt
;
207 out_ctx
->width
= FFALIGN(stage
->planes_out
[0].width
, 32);
208 out_ctx
->height
= FFALIGN(stage
->planes_out
[0].height
, 32);
210 ret
= av_hwframe_ctx_init(out_ref
);
214 av_frame_unref(stage
->frame
);
215 ret
= av_hwframe_get_buffer(out_ref
, stage
->frame
, 0);
219 stage
->frame
->width
= stage
->planes_out
[0].width
;
220 stage
->frame
->height
= stage
->planes_out
[0].height
;
222 av_buffer_unref(&stage
->frames_ctx
);
223 stage
->frames_ctx
= out_ref
;
227 av_buffer_unref(&out_ref
);
231 static int format_is_supported(enum AVPixelFormat fmt
)
235 for (i
= 0; i
< FF_ARRAY_ELEMS(supported_formats
); i
++)
236 if (supported_formats
[i
] == fmt
)
241 static enum AVPixelFormat
get_deinterleaved_format(enum AVPixelFormat fmt
)
243 const AVPixFmtDescriptor
*desc
= av_pix_fmt_desc_get(fmt
);
246 planes
= av_pix_fmt_count_planes(fmt
);
247 if (planes
== desc
->nb_components
)
249 for (i
= 0; i
< FF_ARRAY_ELEMS(deinterleaved_formats
); i
++)
250 if (deinterleaved_formats
[i
][0] == fmt
)
251 return deinterleaved_formats
[i
][1];
252 return AV_PIX_FMT_NONE
;
255 static int init_processing_chain(AVFilterContext
*ctx
, int in_width
, int in_height
,
256 int out_width
, int out_height
)
258 NPPScaleContext
*s
= ctx
->priv
;
260 AVHWFramesContext
*in_frames_ctx
;
262 enum AVPixelFormat in_format
;
263 enum AVPixelFormat out_format
;
264 enum AVPixelFormat in_deinterleaved_format
;
265 enum AVPixelFormat out_deinterleaved_format
;
267 int i
, ret
, last_stage
= -1;
269 /* check that we have a hw context */
270 if (!ctx
->inputs
[0]->hw_frames_ctx
) {
271 av_log(ctx
, AV_LOG_ERROR
, "No hw context provided on input\n");
272 return AVERROR(EINVAL
);
274 in_frames_ctx
= (AVHWFramesContext
*)ctx
->inputs
[0]->hw_frames_ctx
->data
;
275 in_format
= in_frames_ctx
->sw_format
;
276 out_format
= (s
->format
== AV_PIX_FMT_NONE
) ? in_format
: s
->format
;
278 if (!format_is_supported(in_format
)) {
279 av_log(ctx
, AV_LOG_ERROR
, "Unsupported input format: %s\n",
280 av_get_pix_fmt_name(in_format
));
281 return AVERROR(ENOSYS
);
283 if (!format_is_supported(out_format
)) {
284 av_log(ctx
, AV_LOG_ERROR
, "Unsupported output format: %s\n",
285 av_get_pix_fmt_name(out_format
));
286 return AVERROR(ENOSYS
);
289 in_deinterleaved_format
= get_deinterleaved_format(in_format
);
290 out_deinterleaved_format
= get_deinterleaved_format(out_format
);
291 if (in_deinterleaved_format
== AV_PIX_FMT_NONE
||
292 out_deinterleaved_format
== AV_PIX_FMT_NONE
)
295 /* figure out which stages need to be done */
296 if (in_width
!= out_width
|| in_height
!= out_height
||
297 in_deinterleaved_format
!= out_deinterleaved_format
)
298 s
->stages
[STAGE_RESIZE
].stage_needed
= 1;
300 if (!s
->stages
[STAGE_RESIZE
].stage_needed
&& in_format
== out_format
)
303 if (!s
->passthrough
) {
304 if (in_format
!= in_deinterleaved_format
)
305 s
->stages
[STAGE_DEINTERLEAVE
].stage_needed
= 1;
306 if (out_format
!= out_deinterleaved_format
)
307 s
->stages
[STAGE_INTERLEAVE
].stage_needed
= 1;
310 s
->stages
[STAGE_DEINTERLEAVE
].in_fmt
= in_format
;
311 s
->stages
[STAGE_DEINTERLEAVE
].out_fmt
= in_deinterleaved_format
;
312 s
->stages
[STAGE_DEINTERLEAVE
].planes_in
[0].width
= in_width
;
313 s
->stages
[STAGE_DEINTERLEAVE
].planes_in
[0].height
= in_height
;
315 s
->stages
[STAGE_RESIZE
].in_fmt
= in_deinterleaved_format
;
316 s
->stages
[STAGE_RESIZE
].out_fmt
= out_deinterleaved_format
;
317 s
->stages
[STAGE_RESIZE
].planes_in
[0].width
= in_width
;
318 s
->stages
[STAGE_RESIZE
].planes_in
[0].height
= in_height
;
319 s
->stages
[STAGE_RESIZE
].planes_out
[0].width
= out_width
;
320 s
->stages
[STAGE_RESIZE
].planes_out
[0].height
= out_height
;
322 s
->stages
[STAGE_INTERLEAVE
].in_fmt
= out_deinterleaved_format
;
323 s
->stages
[STAGE_INTERLEAVE
].out_fmt
= out_format
;
324 s
->stages
[STAGE_INTERLEAVE
].planes_in
[0].width
= out_width
;
325 s
->stages
[STAGE_INTERLEAVE
].planes_in
[0].height
= out_height
;
327 /* init the hardware contexts */
328 for (i
= 0; i
< FF_ARRAY_ELEMS(s
->stages
); i
++) {
329 if (!s
->stages
[i
].stage_needed
)
332 ret
= init_stage(&s
->stages
[i
], in_frames_ctx
->device_ref
);
341 ctx
->outputs
[0]->hw_frames_ctx
= av_buffer_ref(s
->stages
[last_stage
].frames_ctx
);
342 if (!ctx
->outputs
[0]->hw_frames_ctx
)
343 return AVERROR(ENOMEM
);
348 static int nppscale_config_props(AVFilterLink
*outlink
)
350 AVFilterContext
*ctx
= outlink
->src
;
351 AVFilterLink
*inlink
= outlink
->src
->inputs
[0];
352 NPPScaleContext
*s
= ctx
->priv
;
354 double var_values
[VARS_NB
], res
;
358 var_values
[VAR_PI
] = M_PI
;
359 var_values
[VAR_PHI
] = M_PHI
;
360 var_values
[VAR_E
] = M_E
;
361 var_values
[VAR_IN_W
] = var_values
[VAR_IW
] = inlink
->w
;
362 var_values
[VAR_IN_H
] = var_values
[VAR_IH
] = inlink
->h
;
363 var_values
[VAR_OUT_W
] = var_values
[VAR_OW
] = NAN
;
364 var_values
[VAR_OUT_H
] = var_values
[VAR_OH
] = NAN
;
365 var_values
[VAR_A
] = (double) inlink
->w
/ inlink
->h
;
366 var_values
[VAR_SAR
] = inlink
->sample_aspect_ratio
.num ?
367 (double) inlink
->sample_aspect_ratio
.num
/ inlink
->sample_aspect_ratio
.den
: 1;
368 var_values
[VAR_DAR
] = var_values
[VAR_A
] * var_values
[VAR_SAR
];
370 /* evaluate width and height */
371 av_expr_parse_and_eval(&res
, (expr
= s
->w_expr
),
372 var_names
, var_values
,
373 NULL
, NULL
, NULL
, NULL
, NULL
, 0, ctx
);
374 s
->w
= var_values
[VAR_OUT_W
] = var_values
[VAR_OW
] = res
;
375 if ((ret
= av_expr_parse_and_eval(&res
, (expr
= s
->h_expr
),
376 var_names
, var_values
,
377 NULL
, NULL
, NULL
, NULL
, NULL
, 0, ctx
)) < 0)
379 s
->h
= var_values
[VAR_OUT_H
] = var_values
[VAR_OH
] = res
;
380 /* evaluate again the width, as it may depend on the output height */
381 if ((ret
= av_expr_parse_and_eval(&res
, (expr
= s
->w_expr
),
382 var_names
, var_values
,
383 NULL
, NULL
, NULL
, NULL
, NULL
, 0, ctx
)) < 0)
390 /* sanity check params */
391 if (w
< -1 || h
< -1) {
392 av_log(ctx
, AV_LOG_ERROR
, "Size values less than -1 are not acceptable.\n");
393 return AVERROR(EINVAL
);
395 if (w
== -1 && h
== -1)
403 w
= av_rescale(h
, inlink
->w
, inlink
->h
);
405 h
= av_rescale(w
, inlink
->h
, inlink
->w
);
407 if (w
> INT_MAX
|| h
> INT_MAX
||
408 (h
* inlink
->w
) > INT_MAX
||
409 (w
* inlink
->h
) > INT_MAX
)
410 av_log(ctx
, AV_LOG_ERROR
, "Rescaled value for width or height is too big.\n");
415 ret
= init_processing_chain(ctx
, inlink
->w
, inlink
->h
, w
, h
);
419 av_log(ctx
, AV_LOG_VERBOSE
, "w:%d h:%d -> w:%d h:%d\n",
420 inlink
->w
, inlink
->h
, outlink
->w
, outlink
->h
);
422 if (inlink
->sample_aspect_ratio
.num
)
423 outlink
->sample_aspect_ratio
= av_mul_q((AVRational
){outlink
->h
*inlink
->w
,
424 outlink
->w
*inlink
->h
},
425 inlink
->sample_aspect_ratio
);
427 outlink
->sample_aspect_ratio
= inlink
->sample_aspect_ratio
;
432 av_log(NULL
, AV_LOG_ERROR
,
433 "Error when evaluating the expression '%s'\n", expr
);
437 static int nppscale_deinterleave(AVFilterContext
*ctx
, NPPScaleStageContext
*stage
,
438 AVFrame
*out
, AVFrame
*in
)
440 AVHWFramesContext
*in_frames_ctx
= (AVHWFramesContext
*)in
->hw_frames_ctx
->data
;
443 switch (in_frames_ctx
->sw_format
) {
444 case AV_PIX_FMT_NV12
:
445 err
= nppiYCbCr420_8u_P2P3R(in
->data
[0], in
->linesize
[0],
446 in
->data
[1], in
->linesize
[1],
447 out
->data
, out
->linesize
,
448 (NppiSize
){ in
->width
, in
->height
});
453 if (err
!= NPP_SUCCESS
) {
454 av_log(ctx
, AV_LOG_ERROR
, "NPP deinterleave error: %d\n", err
);
455 return AVERROR_UNKNOWN
;
461 static int nppscale_resize(AVFilterContext
*ctx
, NPPScaleStageContext
*stage
,
462 AVFrame
*out
, AVFrame
*in
)
464 NPPScaleContext
*s
= ctx
->priv
;
468 for (i
= 0; i
< FF_ARRAY_ELEMS(in
->data
) && in
->data
[i
]; i
++) {
469 int iw
= stage
->planes_in
[i
].width
;
470 int ih
= stage
->planes_in
[i
].height
;
471 int ow
= stage
->planes_out
[i
].width
;
472 int oh
= stage
->planes_out
[i
].height
;
474 err
= nppiResizeSqrPixel_8u_C1R(in
->data
[i
], (NppiSize
){ iw
, ih
},
475 in
->linesize
[i
], (NppiRect
){ 0, 0, iw
, ih
},
476 out
->data
[i
], out
->linesize
[i
],
477 (NppiRect
){ 0, 0, ow
, oh
},
478 (double)ow
/ iw
, (double)oh
/ ih
,
479 0.0, 0.0, s
->interp_algo
);
480 if (err
!= NPP_SUCCESS
) {
481 av_log(ctx
, AV_LOG_ERROR
, "NPP resize error: %d\n", err
);
482 return AVERROR_UNKNOWN
;
489 static int nppscale_interleave(AVFilterContext
*ctx
, NPPScaleStageContext
*stage
,
490 AVFrame
*out
, AVFrame
*in
)
492 AVHWFramesContext
*out_frames_ctx
= (AVHWFramesContext
*)out
->hw_frames_ctx
->data
;
495 switch (out_frames_ctx
->sw_format
) {
496 case AV_PIX_FMT_NV12
:
497 err
= nppiYCbCr420_8u_P3P2R((const uint8_t**)in
->data
,
499 out
->data
[0], out
->linesize
[0],
500 out
->data
[1], out
->linesize
[1],
501 (NppiSize
){ in
->width
, in
->height
});
506 if (err
!= NPP_SUCCESS
) {
507 av_log(ctx
, AV_LOG_ERROR
, "NPP deinterleave error: %d\n", err
);
508 return AVERROR_UNKNOWN
;
514 static int (*const nppscale_process
[])(AVFilterContext
*ctx
, NPPScaleStageContext
*stage
,
515 AVFrame
*out
, AVFrame
*in
) = {
516 [STAGE_DEINTERLEAVE
] = nppscale_deinterleave
,
517 [STAGE_RESIZE
] = nppscale_resize
,
518 [STAGE_INTERLEAVE
] = nppscale_interleave
,
521 static int nppscale_scale(AVFilterContext
*ctx
, AVFrame
*out
, AVFrame
*in
)
523 NPPScaleContext
*s
= ctx
->priv
;
525 int i
, ret
, last_stage
= -1;
527 for (i
= 0; i
< FF_ARRAY_ELEMS(s
->stages
); i
++) {
528 if (!s
->stages
[i
].stage_needed
)
531 ret
= nppscale_process
[i
](ctx
, &s
->stages
[i
], s
->stages
[i
].frame
, src
);
535 src
= s
->stages
[i
].frame
;
541 ret
= av_hwframe_get_buffer(src
->hw_frames_ctx
, s
->tmp_frame
, 0);
545 av_frame_move_ref(out
, src
);
546 av_frame_move_ref(src
, s
->tmp_frame
);
548 ret
= av_frame_copy_props(out
, in
);
555 static int nppscale_filter_frame(AVFilterLink
*link
, AVFrame
*in
)
557 AVFilterContext
*ctx
= link
->dst
;
558 NPPScaleContext
*s
= ctx
->priv
;
559 AVFilterLink
*outlink
= ctx
->outputs
[0];
560 AVHWFramesContext
*frames_ctx
= (AVHWFramesContext
*)outlink
->hw_frames_ctx
->data
;
561 AVCUDADeviceContext
*device_hwctx
= frames_ctx
->device_ctx
->hwctx
;
569 return ff_filter_frame(outlink
, in
);
571 out
= av_frame_alloc();
573 ret
= AVERROR(ENOMEM
);
577 av_reduce(&out
->sample_aspect_ratio
.num
, &out
->sample_aspect_ratio
.den
,
578 (int64_t)in
->sample_aspect_ratio
.num
* outlink
->h
* link
->w
,
579 (int64_t)in
->sample_aspect_ratio
.den
* outlink
->w
* link
->h
,
582 err
= cuCtxPushCurrent(device_hwctx
->cuda_ctx
);
583 if (err
!= CUDA_SUCCESS
) {
584 ret
= AVERROR_UNKNOWN
;
588 ret
= nppscale_scale(ctx
, out
, in
);
590 cuCtxPopCurrent(&dummy
);
595 return ff_filter_frame(outlink
, out
);
602 #define OFFSET(x) offsetof(NPPScaleContext, x)
603 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM
604 static const AVOption options
[] = {
605 { "w", "Output video width", OFFSET(w_expr
), AV_OPT_TYPE_STRING
, { .str
= "iw" }, .flags
= FLAGS
},
606 { "h", "Output video height", OFFSET(h_expr
), AV_OPT_TYPE_STRING
, { .str
= "ih" }, .flags
= FLAGS
},
607 { "format", "Output pixel format", OFFSET(format_str
), AV_OPT_TYPE_STRING
, { .str
= "same" }, .flags
= FLAGS
},
609 { "interp_algo", "Interpolation algorithm used for resizing", OFFSET(interp_algo
), AV_OPT_TYPE_INT
, { .i64
= NPPI_INTER_CUBIC
}, 0, INT_MAX
, FLAGS
, "interp_algo" },
610 { "nn", "nearest neighbour", 0, AV_OPT_TYPE_CONST
, { .i64
= NPPI_INTER_NN
}, 0, 0, FLAGS
, "interp_algo" },
611 { "linear", "linear", 0, AV_OPT_TYPE_CONST
, { .i64
= NPPI_INTER_LINEAR
}, 0, 0, FLAGS
, "interp_algo" },
612 { "cubic", "cubic", 0, AV_OPT_TYPE_CONST
, { .i64
= NPPI_INTER_CUBIC
}, 0, 0, FLAGS
, "interp_algo" },
613 { "cubic2p_bspline", "2-parameter cubic (B=1, C=0)", 0, AV_OPT_TYPE_CONST
, { .i64
= NPPI_INTER_CUBIC2P_BSPLINE
}, 0, 0, FLAGS
, "interp_algo" },
614 { "cubic2p_catmullrom", "2-parameter cubic (B=0, C=1/2)", 0, AV_OPT_TYPE_CONST
, { .i64
= NPPI_INTER_CUBIC2P_CATMULLROM
}, 0, 0, FLAGS
, "interp_algo" },
615 { "cubic2p_b05c03", "2-parameter cubic (B=1/2, C=3/10)", 0, AV_OPT_TYPE_CONST
, { .i64
= NPPI_INTER_CUBIC2P_B05C03
}, 0, 0, FLAGS
, "interp_algo" },
616 { "super", "supersampling", 0, AV_OPT_TYPE_CONST
, { .i64
= NPPI_INTER_SUPER
}, 0, 0, FLAGS
, "interp_algo" },
617 { "lanczos", "Lanczos", 0, AV_OPT_TYPE_CONST
, { .i64
= NPPI_INTER_LANCZOS
}, 0, 0, FLAGS
, "interp_algo" },
621 static const AVClass nppscale_class
= {
622 .class_name
= "nppscale",
623 .item_name
= av_default_item_name
,
625 .version
= LIBAVUTIL_VERSION_INT
,
628 static const AVFilterPad nppscale_inputs
[] = {
631 .type
= AVMEDIA_TYPE_VIDEO
,
632 .filter_frame
= nppscale_filter_frame
,
637 static const AVFilterPad nppscale_outputs
[] = {
640 .type
= AVMEDIA_TYPE_VIDEO
,
641 .config_props
= nppscale_config_props
,
646 AVFilter ff_vf_scale_npp
= {
648 .description
= NULL_IF_CONFIG_SMALL("NVIDIA Performance Primitives video "
649 "scaling and format conversion"),
651 .init
= nppscale_init
,
652 .uninit
= nppscale_uninit
,
653 .query_formats
= nppscale_query_formats
,
655 .priv_size
= sizeof(NPPScaleContext
),
656 .priv_class
= &nppscale_class
,
658 .inputs
= nppscale_inputs
,
659 .outputs
= nppscale_outputs
,