h264_metadata: Add option to delete filler data
[libav.git] / libavcodec / h264_metadata_bsf.c
CommitLineData
9e93001b
MT
1/*
2 * This file is part of Libav.
3 *
4 * Libav is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * Libav is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with Libav; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19#include "libavutil/avstring.h"
20#include "libavutil/common.h"
21#include "libavutil/opt.h"
22
23#include "bsf.h"
24#include "cbs.h"
25#include "cbs_h264.h"
26#include "h264.h"
27#include "h264_sei.h"
28
29enum {
30 PASS,
31 INSERT,
32 REMOVE,
33};
34
35typedef struct H264MetadataContext {
36 const AVClass *class;
37
26513529 38 CodedBitstreamContext *cbc;
9e93001b
MT
39 CodedBitstreamFragment access_unit;
40
41 H264RawAUD aud_nal;
42 H264RawSEI sei_nal;
43
44 int aud;
45
46 AVRational sample_aspect_ratio;
47
48 int video_format;
49 int video_full_range_flag;
50 int colour_primaries;
51 int transfer_characteristics;
52 int matrix_coefficients;
53
54 int chroma_sample_loc_type;
55
56 AVRational tick_rate;
57 int fixed_frame_rate_flag;
58
59 int crop_left;
60 int crop_right;
61 int crop_top;
62 int crop_bottom;
63
64 const char *sei_user_data;
78fa0b90 65 int sei_first_au;
6d5a6dde
MT
66
67 int delete_filler;
9e93001b
MT
68} H264MetadataContext;
69
70
71static int h264_metadata_update_sps(AVBSFContext *bsf,
72 H264RawSPS *sps)
73{
74 H264MetadataContext *ctx = bsf->priv_data;
75 int need_vui = 0;
76 int crop_unit_x, crop_unit_y;
77
78 if (ctx->sample_aspect_ratio.num && ctx->sample_aspect_ratio.den) {
79 // Table E-1.
80 static const AVRational sar_idc[] = {
81 { 0, 0 }, // Unspecified (never written here).
82 { 1, 1 }, { 12, 11 }, { 10, 11 }, { 16, 11 },
83 { 40, 33 }, { 24, 11 }, { 20, 11 }, { 32, 11 },
84 { 80, 33 }, { 18, 11 }, { 15, 11 }, { 64, 33 },
85 { 160, 99 }, { 4, 3 }, { 3, 2 }, { 2, 1 },
86 };
87 int num, den, i;
88
89 av_reduce(&num, &den, ctx->sample_aspect_ratio.num,
90 ctx->sample_aspect_ratio.den, 65535);
91
92 for (i = 1; i < FF_ARRAY_ELEMS(sar_idc); i++) {
93 if (num == sar_idc[i].num &&
94 den == sar_idc[i].den)
95 break;
96 }
97 if (i == FF_ARRAY_ELEMS(sar_idc)) {
98 sps->vui.aspect_ratio_idc = 255;
99 sps->vui.sar_width = num;
100 sps->vui.sar_height = den;
101 } else {
102 sps->vui.aspect_ratio_idc = i;
103 }
104 sps->vui.aspect_ratio_info_present_flag = 1;
105 need_vui = 1;
106 }
107
108#define SET_OR_INFER(field, value, present_flag, infer) do { \
109 if (value >= 0) { \
110 field = value; \
111 need_vui = 1; \
112 } else if (!present_flag) \
113 field = infer; \
114 } while (0)
115
116 if (ctx->video_format >= 0 ||
117 ctx->video_full_range_flag >= 0 ||
118 ctx->colour_primaries >= 0 ||
119 ctx->transfer_characteristics >= 0 ||
120 ctx->matrix_coefficients >= 0) {
121
122 SET_OR_INFER(sps->vui.video_format, ctx->video_format,
123 sps->vui.video_signal_type_present_flag, 5);
124
125 SET_OR_INFER(sps->vui.video_full_range_flag,
126 ctx->video_full_range_flag,
127 sps->vui.video_signal_type_present_flag, 0);
128
129 if (ctx->colour_primaries >= 0 ||
130 ctx->transfer_characteristics >= 0 ||
131 ctx->matrix_coefficients >= 0) {
132
133 SET_OR_INFER(sps->vui.colour_primaries,
134 ctx->colour_primaries,
135 sps->vui.colour_description_present_flag, 2);
136
137 SET_OR_INFER(sps->vui.transfer_characteristics,
138 ctx->transfer_characteristics,
139 sps->vui.colour_description_present_flag, 2);
140
141 SET_OR_INFER(sps->vui.matrix_coefficients,
142 ctx->matrix_coefficients,
143 sps->vui.colour_description_present_flag, 2);
144
145 sps->vui.colour_description_present_flag = 1;
146 }
147 sps->vui.video_signal_type_present_flag = 1;
148 need_vui = 1;
149 }
150
151 if (ctx->chroma_sample_loc_type >= 0) {
152 sps->vui.chroma_sample_loc_type_top_field =
153 ctx->chroma_sample_loc_type;
154 sps->vui.chroma_sample_loc_type_bottom_field =
155 ctx->chroma_sample_loc_type;
156 sps->vui.chroma_loc_info_present_flag = 1;
157 need_vui = 1;
158 }
159
160 if (ctx->tick_rate.num && ctx->tick_rate.den) {
161 int num, den;
162
163 av_reduce(&num, &den, ctx->tick_rate.num, ctx->tick_rate.den,
164 UINT32_MAX > INT_MAX ? UINT32_MAX : INT_MAX);
165
166 sps->vui.time_scale = num;
167 sps->vui.num_units_in_tick = den;
168
169 sps->vui.timing_info_present_flag = 1;
170 need_vui = 1;
171 }
172 SET_OR_INFER(sps->vui.fixed_frame_rate_flag,
173 ctx->fixed_frame_rate_flag,
174 sps->vui.timing_info_present_flag, 0);
175
176 if (sps->separate_colour_plane_flag || sps->chroma_format_idc == 0) {
177 crop_unit_x = 1;
178 crop_unit_y = 2 - sps->frame_mbs_only_flag;
179 } else {
180 crop_unit_x = 1 + (sps->chroma_format_idc < 3);
181 crop_unit_y = (1 + (sps->chroma_format_idc < 2)) *
182 (2 - sps->frame_mbs_only_flag);
183 }
184#define CROP(border, unit) do { \
185 if (ctx->crop_ ## border >= 0) { \
186 if (ctx->crop_ ## border % unit != 0) { \
187 av_log(bsf, AV_LOG_ERROR, "Invalid value for crop_%s: " \
188 "must be a multiple of %d.\n", #border, unit); \
189 return AVERROR(EINVAL); \
190 } \
191 sps->frame_crop_ ## border ## _offset = \
192 ctx->crop_ ## border / unit; \
193 sps->frame_cropping_flag = 1; \
194 } \
195 } while (0)
196 CROP(left, crop_unit_x);
197 CROP(right, crop_unit_x);
198 CROP(top, crop_unit_y);
199 CROP(bottom, crop_unit_y);
200#undef CROP
201
202 if (need_vui)
203 sps->vui_parameters_present_flag = 1;
204
205 return 0;
206}
207
208static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
209{
210 H264MetadataContext *ctx = bsf->priv_data;
211 AVPacket *in = NULL;
212 CodedBitstreamFragment *au = &ctx->access_unit;
213 int err, i, j, has_sps;
9e93001b
MT
214
215 err = ff_bsf_get_packet(bsf, &in);
216 if (err < 0)
217 goto fail;
218
26513529 219 err = ff_cbs_read_packet(ctx->cbc, au, in);
9e93001b
MT
220 if (err < 0) {
221 av_log(bsf, AV_LOG_ERROR, "Failed to read packet.\n");
222 goto fail;
223 }
224
225 if (au->nb_units == 0) {
226 av_log(bsf, AV_LOG_ERROR, "No NAL units in packet.\n");
227 err = AVERROR_INVALIDDATA;
228 goto fail;
229 }
230
231 // If an AUD is present, it must be the first NAL unit.
232 if (au->units[0].type == H264_NAL_AUD) {
233 if (ctx->aud == REMOVE)
26513529 234 ff_cbs_delete_unit(ctx->cbc, au, 0);
9e93001b
MT
235 } else {
236 if (ctx->aud == INSERT) {
237 static const int primary_pic_type_table[] = {
238 0x084, // 2, 7
239 0x0a5, // 0, 2, 5, 7
240 0x0e7, // 0, 1, 2, 5, 6, 7
241 0x210, // 4, 9
242 0x318, // 3, 4, 8, 9
243 0x294, // 2, 4, 7, 9
244 0x3bd, // 0, 2, 3, 4, 5, 7, 8, 9
245 0x3ff, // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9
246 };
247 int primary_pic_type_mask = 0xff;
248 H264RawAUD *aud = &ctx->aud_nal;
249
250 for (i = 0; i < au->nb_units; i++) {
251 if (au->units[i].type == H264_NAL_SLICE ||
252 au->units[i].type == H264_NAL_IDR_SLICE) {
253 H264RawSlice *slice = au->units[i].content;
254 for (j = 0; j < FF_ARRAY_ELEMS(primary_pic_type_table); j++) {
255 if (!(primary_pic_type_table[j] &
256 (1 << slice->header.slice_type)))
257 primary_pic_type_mask &= ~(1 << j);
258 }
259 }
260 }
261 for (j = 0; j < FF_ARRAY_ELEMS(primary_pic_type_table); j++)
262 if (primary_pic_type_mask & (1 << j))
263 break;
264 if (j >= FF_ARRAY_ELEMS(primary_pic_type_table)) {
265 av_log(bsf, AV_LOG_ERROR, "No usable primary_pic_type: "
266 "invalid slice types?\n");
267 err = AVERROR_INVALIDDATA;
268 goto fail;
269 }
270
271 aud->nal_unit_header.nal_unit_type = H264_NAL_AUD;
272 aud->primary_pic_type = j;
273
26513529 274 err = ff_cbs_insert_unit_content(ctx->cbc, au,
ce5870a3 275 0, H264_NAL_AUD, aud, NULL);
9e93001b
MT
276 if (err < 0) {
277 av_log(bsf, AV_LOG_ERROR, "Failed to insert AUD.\n");
278 goto fail;
279 }
280 }
281 }
282
283 has_sps = 0;
284 for (i = 0; i < au->nb_units; i++) {
285 if (au->units[i].type == H264_NAL_SPS) {
286 err = h264_metadata_update_sps(bsf, au->units[i].content);
287 if (err < 0)
288 goto fail;
289 has_sps = 1;
290 }
291 }
292
78fa0b90
MT
293 // Only insert the SEI in access units containing SPSs, and also
294 // unconditionally in the first access unit we ever see.
295 if (ctx->sei_user_data && (has_sps || !ctx->sei_first_au)) {
69062d0f
MT
296 H264RawSEIPayload payload = {
297 .payload_type = H264_SEI_TYPE_USER_DATA_UNREGISTERED,
298 };
299 H264RawSEIUserDataUnregistered *udu =
300 &payload.payload.user_data_unregistered;
9e93001b 301
78fa0b90
MT
302 ctx->sei_first_au = 1;
303
9e93001b
MT
304 for (i = j = 0; j < 32 && ctx->sei_user_data[i]; i++) {
305 int c, v;
306 c = ctx->sei_user_data[i];
307 if (c == '-') {
308 continue;
309 } else if (av_isxdigit(c)) {
310 c = av_tolower(c);
311 v = (c <= '9' ? c - '0' : c - 'a' + 10);
312 } else {
313 goto invalid_user_data;
314 }
315 if (i & 1)
316 udu->uuid_iso_iec_11578[j / 2] |= v;
317 else
318 udu->uuid_iso_iec_11578[j / 2] = v << 4;
319 ++j;
320 }
321 if (j == 32 && ctx->sei_user_data[i] == '+') {
69062d0f
MT
322 size_t len = strlen(ctx->sei_user_data + i + 1);
323
324 udu->data_ref = av_buffer_alloc(len + 1);
325 if (!udu->data_ref) {
9e93001b 326 err = AVERROR(ENOMEM);
69062d0f 327 goto fail;
9e93001b
MT
328 }
329
69062d0f
MT
330 udu->data = udu->data_ref->data;
331 udu->data_length = len + 1;
332 memcpy(udu->data, ctx->sei_user_data + i + 1, len + 1);
9e93001b 333
69062d0f 334 payload.payload_size = 16 + udu->data_length;
9e93001b 335
69062d0f
MT
336 err = ff_cbs_h264_add_sei_message(ctx->cbc, au, &payload);
337 if (err < 0) {
338 av_log(bsf, AV_LOG_ERROR, "Failed to add user data SEI "
339 "message to access unit.\n");
340 goto fail;
c42b62d1
MT
341 }
342
9e93001b
MT
343 } else {
344 invalid_user_data:
345 av_log(bsf, AV_LOG_ERROR, "Invalid user data: "
346 "must be \"UUID+string\".\n");
347 err = AVERROR(EINVAL);
9e93001b 348 }
9e93001b
MT
349 }
350
6d5a6dde
MT
351 if (ctx->delete_filler) {
352 for (i = 0; i < au->nb_units; i++) {
353 if (au->units[i].type == H264_NAL_FILLER_DATA) {
354 // Filler NAL units.
355 err = ff_cbs_delete_unit(ctx->cbc, au, i);
356 if (err < 0) {
357 av_log(bsf, AV_LOG_ERROR, "Failed to delete "
358 "filler NAL.\n");
359 goto fail;
360 }
361 --i;
362 continue;
363 }
364
365 if (au->units[i].type == H264_NAL_SEI) {
366 // Filler SEI messages.
367 H264RawSEI *sei = au->units[i].content;
368
369 for (j = 0; j < sei->payload_count; j++) {
370 if (sei->payload[j].payload_type ==
371 H264_SEI_TYPE_FILLER_PAYLOAD) {
372 err = ff_cbs_h264_delete_sei_message(ctx->cbc, au,
373 &au->units[i], j);
374 if (err < 0) {
375 av_log(bsf, AV_LOG_ERROR, "Failed to delete "
376 "filler SEI message.\n");
377 goto fail;
378 }
379 // Renumbering might have happened, start again at
380 // the same NAL unit position.
381 --i;
382 break;
383 }
384 }
385 }
386 }
387 }
388
26513529 389 err = ff_cbs_write_packet(ctx->cbc, out, au);
9e93001b
MT
390 if (err < 0) {
391 av_log(bsf, AV_LOG_ERROR, "Failed to write packet.\n");
392 goto fail;
393 }
394
395 err = av_packet_copy_props(out, in);
396 if (err < 0)
397 goto fail;
398
399 err = 0;
400fail:
26513529 401 ff_cbs_fragment_uninit(ctx->cbc, au);
9e93001b
MT
402
403 av_packet_free(&in);
404
405 return err;
406}
407
408static int h264_metadata_init(AVBSFContext *bsf)
409{
410 H264MetadataContext *ctx = bsf->priv_data;
411 CodedBitstreamFragment *au = &ctx->access_unit;
412 int err, i;
413
414 err = ff_cbs_init(&ctx->cbc, AV_CODEC_ID_H264, bsf);
415 if (err < 0)
416 return err;
417
418 if (bsf->par_in->extradata) {
26513529 419 err = ff_cbs_read_extradata(ctx->cbc, au, bsf->par_in);
9e93001b
MT
420 if (err < 0) {
421 av_log(bsf, AV_LOG_ERROR, "Failed to read extradata.\n");
422 goto fail;
423 }
424
425 for (i = 0; i < au->nb_units; i++) {
426 if (au->units[i].type == H264_NAL_SPS) {
427 err = h264_metadata_update_sps(bsf, au->units[i].content);
428 if (err < 0)
429 goto fail;
430 }
431 }
432
26513529 433 err = ff_cbs_write_extradata(ctx->cbc, bsf->par_out, au);
9e93001b
MT
434 if (err < 0) {
435 av_log(bsf, AV_LOG_ERROR, "Failed to write extradata.\n");
436 goto fail;
437 }
438 }
439
440 err = 0;
441fail:
26513529 442 ff_cbs_fragment_uninit(ctx->cbc, au);
9e93001b
MT
443 return err;
444}
445
446static void h264_metadata_close(AVBSFContext *bsf)
447{
448 H264MetadataContext *ctx = bsf->priv_data;
449 ff_cbs_close(&ctx->cbc);
450}
451
452#define OFFSET(x) offsetof(H264MetadataContext, x)
453static const AVOption h264_metadata_options[] = {
454 { "aud", "Access Unit Delimiter NAL units",
455 OFFSET(aud), AV_OPT_TYPE_INT,
456 { .i64 = PASS }, PASS, REMOVE, 0, "aud" },
457 { "pass", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PASS }, .unit = "aud" },
458 { "insert", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = INSERT }, .unit = "aud" },
459 { "remove", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = REMOVE }, .unit = "aud" },
460
461 { "sample_aspect_ratio", "Set sample aspect ratio (table E-1)",
462 OFFSET(sample_aspect_ratio), AV_OPT_TYPE_RATIONAL,
463 { .i64 = 0 }, 0, 65535 },
464
465 { "video_format", "Set video format (table E-2)",
466 OFFSET(video_format), AV_OPT_TYPE_INT,
467 { .i64 = -1 }, -1, 7 },
468 { "video_full_range_flag", "Set video full range flag",
469 OFFSET(video_full_range_flag), AV_OPT_TYPE_INT,
470 { .i64 = -1 }, -1, 1 },
471 { "colour_primaries", "Set colour primaries (table E-3)",
472 OFFSET(colour_primaries), AV_OPT_TYPE_INT,
473 { .i64 = -1 }, -1, 255 },
474 { "transfer_characteristics", "Set transfer characteristics (table E-4)",
475 OFFSET(transfer_characteristics), AV_OPT_TYPE_INT,
476 { .i64 = -1 }, -1, 255 },
477 { "matrix_coefficients", "Set matrix coefficients (table E-5)",
478 OFFSET(matrix_coefficients), AV_OPT_TYPE_INT,
479 { .i64 = -1 }, -1, 255 },
480
481 { "chroma_sample_loc_type", "Set chroma sample location type (figure E-1)",
482 OFFSET(chroma_sample_loc_type), AV_OPT_TYPE_INT,
483 { .i64 = -1 }, -1, 6 },
484
485 { "tick_rate", "Set VUI tick rate (num_units_in_tick / time_scale)",
486 OFFSET(tick_rate), AV_OPT_TYPE_RATIONAL,
487 { .i64 = 0 }, 0, UINT_MAX },
488 { "fixed_frame_rate_flag", "Set VUI fixed frame rate flag",
489 OFFSET(fixed_frame_rate_flag), AV_OPT_TYPE_INT,
490 { .i64 = -1 }, -1, 1 },
491
492 { "crop_left", "Set left border crop offset",
493 OFFSET(crop_left), AV_OPT_TYPE_INT,
494 { .i64 = -1 }, -1, H264_MAX_WIDTH },
495 { "crop_right", "Set right border crop offset",
496 OFFSET(crop_right), AV_OPT_TYPE_INT,
497 { .i64 = -1 }, -1, H264_MAX_WIDTH },
498 { "crop_top", "Set top border crop offset",
499 OFFSET(crop_top), AV_OPT_TYPE_INT,
500 { .i64 = -1 }, -1, H264_MAX_HEIGHT },
501 { "crop_bottom", "Set bottom border crop offset",
502 OFFSET(crop_bottom), AV_OPT_TYPE_INT,
503 { .i64 = -1 }, -1, H264_MAX_HEIGHT },
504
505 { "sei_user_data", "Insert SEI user data (UUID+string)",
506 OFFSET(sei_user_data), AV_OPT_TYPE_STRING, { .str = NULL } },
507
6d5a6dde
MT
508 { "delete_filler", "Delete all filler (both NAL and SEI)",
509 OFFSET(delete_filler), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1 },
510
9e93001b
MT
511 { NULL }
512};
513
514static const AVClass h264_metadata_class = {
515 .class_name = "h264_metadata_bsf",
516 .item_name = av_default_item_name,
517 .option = h264_metadata_options,
518 .version = LIBAVCODEC_VERSION_MAJOR,
519};
520
521static const enum AVCodecID h264_metadata_codec_ids[] = {
522 AV_CODEC_ID_H264, AV_CODEC_ID_NONE,
523};
524
525const AVBitStreamFilter ff_h264_metadata_bsf = {
526 .name = "h264_metadata",
527 .priv_data_size = sizeof(H264MetadataContext),
528 .priv_class = &h264_metadata_class,
529 .init = &h264_metadata_init,
530 .close = &h264_metadata_close,
531 .filter = &h264_metadata_filter,
532 .codec_ids = h264_metadata_codec_ids,
533};