2 * Matroska file demuxer (no muxer yet)
3 * Copyright (c) 2003-2004 The ffmpeg Project
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * Matroska file demuxer
23 * by Ronald Bultje <rbultje@ronald.bitfreak.net>
24 * with a little help from Moritz Bunkus <moritz@bunkus.org>
25 * Specs available on the matroska project page:
26 * http://www.matroska.org/.
30 /* For codec_get_bmp_id and codec_get_wav_id. */
33 /* EBML version supported */
34 #define EBML_VERSION 1
36 /* top-level master-IDs */
37 #define EBML_ID_HEADER 0x1A45DFA3
39 /* IDs in the HEADER master */
40 #define EBML_ID_EBMLVERSION 0x4286
41 #define EBML_ID_EBMLREADVERSION 0x42F7
42 #define EBML_ID_EBMLMAXIDLENGTH 0x42F2
43 #define EBML_ID_EBMLMAXSIZELENGTH 0x42F3
44 #define EBML_ID_DOCTYPE 0x4282
45 #define EBML_ID_DOCTYPEVERSION 0x4287
46 #define EBML_ID_DOCTYPEREADVERSION 0x4285
48 /* general EBML types */
49 #define EBML_ID_VOID 0xEC
52 * Matroska element IDs. max. 32-bit.
55 /* toplevel segment */
56 #define MATROSKA_ID_SEGMENT 0x18538067
58 /* matroska top-level master IDs */
59 #define MATROSKA_ID_INFO 0x1549A966
60 #define MATROSKA_ID_TRACKS 0x1654AE6B
61 #define MATROSKA_ID_CUES 0x1C53BB6B
62 #define MATROSKA_ID_TAGS 0x1254C367
63 #define MATROSKA_ID_SEEKHEAD 0x114D9B74
64 #define MATROSKA_ID_CLUSTER 0x1F43B675
66 /* IDs in the info master */
67 #define MATROSKA_ID_TIMECODESCALE 0x2AD7B1
68 #define MATROSKA_ID_DURATION 0x4489
69 #define MATROSKA_ID_WRITINGAPP 0x5741
70 #define MATROSKA_ID_MUXINGAPP 0x4D80
71 #define MATROSKA_ID_DATEUTC 0x4461
73 /* ID in the tracks master */
74 #define MATROSKA_ID_TRACKENTRY 0xAE
76 /* IDs in the trackentry master */
77 #define MATROSKA_ID_TRACKNUMBER 0xD7
78 #define MATROSKA_ID_TRACKUID 0x73C5
79 #define MATROSKA_ID_TRACKTYPE 0x83
80 #define MATROSKA_ID_TRACKAUDIO 0xE1
81 #define MATROSKA_ID_TRACKVIDEO 0xE0
82 #define MATROSKA_ID_CODECID 0x86
83 #define MATROSKA_ID_CODECPRIVATE 0x63A2
84 #define MATROSKA_ID_CODECNAME 0x258688
85 #define MATROSKA_ID_CODECINFOURL 0x3B4040
86 #define MATROSKA_ID_CODECDOWNLOADURL 0x26B240
87 #define MATROSKA_ID_TRACKNAME 0x536E
88 #define MATROSKA_ID_TRACKLANGUAGE 0x22B59C
89 #define MATROSKA_ID_TRACKFLAGENABLED 0xB9
90 #define MATROSKA_ID_TRACKFLAGDEFAULT 0x88
91 #define MATROSKA_ID_TRACKFLAGLACING 0x9C
92 #define MATROSKA_ID_TRACKMINCACHE 0x6DE7
93 #define MATROSKA_ID_TRACKMAXCACHE 0x6DF8
94 #define MATROSKA_ID_TRACKDEFAULTDURATION 0x23E383
96 /* IDs in the trackvideo master */
97 #define MATROSKA_ID_VIDEOFRAMERATE 0x2383E3
98 #define MATROSKA_ID_VIDEODISPLAYWIDTH 0x54B0
99 #define MATROSKA_ID_VIDEODISPLAYHEIGHT 0x54BA
100 #define MATROSKA_ID_VIDEOPIXELWIDTH 0xB0
101 #define MATROSKA_ID_VIDEOPIXELHEIGHT 0xBA
102 #define MATROSKA_ID_VIDEOFLAGINTERLACED 0x9A
103 #define MATROSKA_ID_VIDEOSTEREOMODE 0x53B9
104 #define MATROSKA_ID_VIDEOASPECTRATIO 0x54B3
105 #define MATROSKA_ID_VIDEOCOLOURSPACE 0x2EB524
107 /* IDs in the trackaudio master */
108 #define MATROSKA_ID_AUDIOSAMPLINGFREQ 0xB5
109 #define MATROSKA_ID_AUDIOBITDEPTH 0x6264
110 #define MATROSKA_ID_AUDIOCHANNELS 0x9F
112 /* ID in the cues master */
113 #define MATROSKA_ID_POINTENTRY 0xBB
115 /* IDs in the pointentry master */
116 #define MATROSKA_ID_CUETIME 0xB3
117 #define MATROSKA_ID_CUETRACKPOSITION 0xB7
119 /* IDs in the cuetrackposition master */
120 #define MATROSKA_ID_CUETRACK 0xF7
121 #define MATROSKA_ID_CUECLUSTERPOSITION 0xF1
123 /* IDs in the tags master */
126 /* IDs in the seekhead master */
127 #define MATROSKA_ID_SEEKENTRY 0x4DBB
129 /* IDs in the seekpoint master */
130 #define MATROSKA_ID_SEEKID 0x53AB
131 #define MATROSKA_ID_SEEKPOSITION 0x53AC
133 /* IDs in the cluster master */
134 #define MATROSKA_ID_CLUSTERTIMECODE 0xE7
135 #define MATROSKA_ID_BLOCKGROUP 0xA0
137 /* IDs in the blockgroup master */
138 #define MATROSKA_ID_BLOCK 0xA1
139 #define MATROSKA_ID_BLOCKDURATION 0x9B
140 #define MATROSKA_ID_BLOCKREFERENCE 0xFB
143 MATROSKA_TRACK_TYPE_VIDEO
= 0x1,
144 MATROSKA_TRACK_TYPE_AUDIO
= 0x2,
145 MATROSKA_TRACK_TYPE_COMPLEX
= 0x3,
146 MATROSKA_TRACK_TYPE_LOGO
= 0x10,
147 MATROSKA_TRACK_TYPE_SUBTITLE
= 0x11,
148 MATROSKA_TRACK_TYPE_CONTROL
= 0x20,
152 MATROSKA_EYE_MODE_MONO
= 0x0,
153 MATROSKA_EYE_MODE_RIGHT
= 0x1,
154 MATROSKA_EYE_MODE_LEFT
= 0x2,
155 MATROSKA_EYE_MODE_BOTH
= 0x3,
159 MATROSKA_ASPECT_RATIO_MODE_FREE
= 0x0,
160 MATROSKA_ASPECT_RATIO_MODE_KEEP
= 0x1,
161 MATROSKA_ASPECT_RATIO_MODE_FIXED
= 0x2,
162 } MatroskaAspectRatioMode
;
165 * These aren't in any way "matroska-form" things,
166 * it's just something I use in the muxer/demuxer.
170 MATROSKA_TRACK_ENABLED
= (1<<0),
171 MATROSKA_TRACK_DEFAULT
= (1<<1),
172 MATROSKA_TRACK_LACING
= (1<<2),
173 MATROSKA_TRACK_SHIFT
= (1<<16)
174 } MatroskaTrackFlags
;
177 MATROSKA_VIDEOTRACK_INTERLACED
= (MATROSKA_TRACK_SHIFT
<<0)
178 } MatroskaVideoTrackFlags
;
181 * Matroska Codec IDs. Strings.
184 #define MATROSKA_CODEC_ID_VIDEO_VFW_FOURCC "V_MS/VFW/FOURCC"
185 #define MATROSKA_CODEC_ID_VIDEO_UNCOMPRESSED "V_UNCOMPRESSED"
186 #define MATROSKA_CODEC_ID_VIDEO_MPEG4_SP "V_MPEG4/ISO/SP"
187 #define MATROSKA_CODEC_ID_VIDEO_MPEG4_ASP "V_MPEG4/ISO/ASP"
188 #define MATROSKA_CODEC_ID_VIDEO_MPEG4_AP "V_MPEG4/ISO/AP"
189 #define MATROSKA_CODEC_ID_VIDEO_MPEG4_AVC "V_MPEG4/ISO/AVC"
190 #define MATROSKA_CODEC_ID_VIDEO_MSMPEG4V3 "V_MPEG4/MS/V3"
191 #define MATROSKA_CODEC_ID_VIDEO_MPEG1 "V_MPEG1"
192 #define MATROSKA_CODEC_ID_VIDEO_MPEG2 "V_MPEG2"
193 #define MATROSKA_CODEC_ID_VIDEO_MJPEG "V_MJPEG"
194 /* TODO: Real/Quicktime */
196 #define MATROSKA_CODEC_ID_AUDIO_ACM "A_MS/ACM"
197 #define MATROSKA_CODEC_ID_AUDIO_MPEG1_L1 "A_MPEG/L1"
198 #define MATROSKA_CODEC_ID_AUDIO_MPEG1_L2 "A_MPEG/L2"
199 #define MATROSKA_CODEC_ID_AUDIO_MPEG1_L3 "A_MPEG/L3"
200 #define MATROSKA_CODEC_ID_AUDIO_PCM_INT_BE "A_PCM/INT/BIG"
201 #define MATROSKA_CODEC_ID_AUDIO_PCM_INT_LE "A_PCM/INT/LIT"
202 #define MATROSKA_CODEC_ID_AUDIO_PCM_FLOAT "A_PCM/FLOAT/IEEE"
203 #define MATROSKA_CODEC_ID_AUDIO_AC3 "A_AC3"
204 #define MATROSKA_CODEC_ID_AUDIO_DTS "A_DTS"
205 #define MATROSKA_CODEC_ID_AUDIO_VORBIS "A_VORBIS"
206 #define MATROSKA_CODEC_ID_AUDIO_ACM "A_MS/ACM"
207 #define MATROSKA_CODEC_ID_AUDIO_MPEG2 "A_AAC/MPEG2/"
208 #define MATROSKA_CODEC_ID_AUDIO_MPEG4 "A_AAC/MPEG4/"
209 /* TODO: AC3-9/10 (?), Real, Musepack, Quicktime */
211 /* max. depth in the EBML tree structure */
212 #define EBML_MAX_DEPTH 16
214 typedef struct Track
{
215 MatroskaTrackType type
;
217 /* Unique track number and track ID. stream_index is the index that
218 * the calling app uses for this track. */
229 unsigned char *codec_priv
;
232 int64_t default_duration
;
233 MatroskaTrackFlags flags
;
236 typedef struct MatroskaVideoTrack
{
246 MatroskaAspectRatioMode ar_mode
;
247 MatroskaEyeMode eye_mode
;
250 } MatroskaVideoTrack
;
252 typedef struct MatroskaAudioTrack
{
259 } MatroskaAudioTrack
;
261 typedef struct MatroskaSubtitleTrack
{
265 } MatroskaSubtitleTrack
;
267 typedef struct MatroskaLevel
{
268 uint64_t start
, length
;
271 typedef struct MatroskaDemuxIndex
{
272 uint64_t pos
; /* of the corresponding *cluster*! */
273 uint16_t track
; /* reference to 'num' */
274 uint64_t time
; /* in nanoseconds */
275 } MatroskaDemuxIndex
;
277 typedef struct MatroskaDemuxContext
{
278 AVFormatContext
*ctx
;
282 MatroskaLevel levels
[EBML_MAX_DEPTH
];
290 /* timescale in the file */
293 /* length, position (time, ns) */
297 /* num_streams is the number of streams that av_new_stream() was called
298 * for ( = that are available to the calling program). */
299 int num_tracks
, num_streams
;
300 MatroskaTrack
*tracks
[MAX_STREAMS
];
302 /* cache for ID peeking */
305 /* byte position of the segment inside the stream */
306 offset_t segment_start
;
308 /* The packet queue. */
312 /* have we already parse metadata/cues/clusters? */
317 /* The index for seeking. */
319 MatroskaDemuxIndex
*index
;
320 } MatroskaDemuxContext
;
323 * The first few functions handle EBML file parsing. The rest
324 * is the document interpretation. Matroska really just is a
329 * Return: the amount of levels in the hierarchy that the
330 * current element lies higher than the previous one.
331 * The opposite isn't done - that's auto-done using master
336 ebml_read_element_level_up (MatroskaDemuxContext
*matroska
)
338 ByteIOContext
*pb
= &matroska
->ctx
->pb
;
339 offset_t pos
= url_ftell(pb
);
342 while (matroska
->num_levels
> 0) {
343 MatroskaLevel
*level
= &matroska
->levels
[matroska
->num_levels
- 1];
345 if (pos
>= level
->start
+ level
->length
) {
346 matroska
->num_levels
--;
357 * Read: an "EBML number", which is defined as a variable-length
358 * array of bytes. The first byte indicates the length by giving a
359 * number of 0-bits followed by a one. The position of the first
360 * "one" bit inside the first byte indicates the length of this
362 * Returns: num. of bytes read. < 0 on error.
366 ebml_read_num (MatroskaDemuxContext
*matroska
,
370 ByteIOContext
*pb
= &matroska
->ctx
->pb
;
371 int len_mask
= 0x80, read
= 1, n
= 1;
374 /* the first byte tells us the length in bytes - get_byte() can normally
375 * return 0, but since that's not a valid first ebmlID byte, we can
376 * use it safely here to catch EOS. */
377 if (!(total
= get_byte(pb
))) {
378 /* we might encounter EOS here */
380 offset_t pos
= url_ftell(pb
);
381 av_log(matroska
->ctx
, AV_LOG_ERROR
,
382 "Read error at pos. %llu (0x%llx)\n",
385 return AVERROR_IO
; /* EOS or actual I/O error */
388 /* get the length of the EBML number */
389 while (read
<= max_size
&& !(total
& len_mask
)) {
393 if (read
> max_size
) {
394 offset_t pos
= url_ftell(pb
) - 1;
395 av_log(matroska
->ctx
, AV_LOG_ERROR
,
396 "Invalid EBML number size tag 0x%02x at pos %llu (0x%llx)\n",
397 (uint8_t) total
, pos
, pos
);
398 return AVERROR_INVALIDDATA
;
401 /* read out length */
404 total
= (total
<< 8) | get_byte(pb
);
412 * Read: the element content data ID.
413 * Return: the number of bytes read or < 0 on error.
417 ebml_read_element_id (MatroskaDemuxContext
*matroska
,
424 /* if we re-call this, use our cached ID */
425 if (matroska
->peek_id
!= 0) {
428 *id
= matroska
->peek_id
;
432 /* read out the "EBML number", include tag in ID */
433 if ((read
= ebml_read_num(matroska
, 4, &total
)) < 0)
435 *id
= matroska
->peek_id
= total
| (1 << (read
* 7));
439 *level_up
= ebml_read_element_level_up(matroska
);
445 * Read: element content length.
446 * Return: the number of bytes read or < 0 on error.
450 ebml_read_element_length (MatroskaDemuxContext
*matroska
,
453 /* clear cache since we're now beyond that data point */
454 matroska
->peek_id
= 0;
456 /* read out the "EBML number", include tag in ID */
457 return ebml_read_num(matroska
, 8, length
);
461 * Return: the ID of the next element, or 0 on error.
462 * Level_up contains the amount of levels that this
463 * next element lies higher than the previous one.
467 ebml_peek_id (MatroskaDemuxContext
*matroska
,
472 assert(level_up
!= NULL
);
474 if (ebml_read_element_id(matroska
, &id
, level_up
) < 0)
481 * Seek to a given offset.
482 * 0 is success, -1 is failure.
486 ebml_read_seek (MatroskaDemuxContext
*matroska
,
489 ByteIOContext
*pb
= &matroska
->ctx
->pb
;
491 /* clear ID cache, if any */
492 matroska
->peek_id
= 0;
494 return (url_fseek(pb
, offset
, SEEK_SET
) == offset
) ?
0 : -1;
498 * Skip the next element.
499 * 0 is success, -1 is failure.
503 ebml_read_skip (MatroskaDemuxContext
*matroska
)
505 ByteIOContext
*pb
= &matroska
->ctx
->pb
;
510 if ((res
= ebml_read_element_id(matroska
, &id
, NULL
)) < 0 ||
511 (res
= ebml_read_element_length(matroska
, &length
)) < 0)
514 url_fskip(pb
, length
);
520 * Read the next element as an unsigned int.
521 * 0 is success, < 0 is failure.
525 ebml_read_uint (MatroskaDemuxContext
*matroska
,
529 ByteIOContext
*pb
= &matroska
->ctx
->pb
;
530 int n
= 0, size
, res
;
533 if ((res
= ebml_read_element_id(matroska
, id
, NULL
)) < 0 ||
534 (res
= ebml_read_element_length(matroska
, &rlength
)) < 0)
537 if (size
< 1 || size
> 8) {
538 offset_t pos
= url_ftell(pb
);
539 av_log(matroska
->ctx
, AV_LOG_ERROR
,
540 "Invalid uint element size %d at position %lld (0x%llx)\n",
542 return AVERROR_INVALIDDATA
;
545 /* big-endian ordening; build up number */
548 *num
= (*num
<< 8) | get_byte(pb
);
554 * Read the next element as a signed int.
555 * 0 is success, < 0 is failure.
559 ebml_read_sint (MatroskaDemuxContext
*matroska
,
563 ByteIOContext
*pb
= &matroska
->ctx
->pb
;
564 int size
, n
= 1, negative
= 0, res
;
567 if ((res
= ebml_read_element_id(matroska
, id
, NULL
)) < 0 ||
568 (res
= ebml_read_element_length(matroska
, &rlength
)) < 0)
571 if (size
< 1 || size
> 8) {
572 offset_t pos
= url_ftell(pb
);
573 av_log(matroska
->ctx
, AV_LOG_ERROR
,
574 "Invalid sint element size %d at position %lld (0x%llx)\n",
576 return AVERROR_INVALIDDATA
;
578 if ((*num
= get_byte(pb
)) & 0x80) {
584 *num
= (*num
<< 8) | get_byte(pb
);
588 *num
= *num
- (1LL << ((8 * size
) - 1));
594 * Read the next element as a float.
595 * 0 is success, < 0 is failure.
599 ebml_read_float (MatroskaDemuxContext
*matroska
,
603 ByteIOContext
*pb
= &matroska
->ctx
->pb
;
607 if ((res
= ebml_read_element_id(matroska
, id
, NULL
)) < 0 ||
608 (res
= ebml_read_element_length(matroska
, &rlength
)) < 0)
612 if (size
!= 4 && size
!= 8 && size
!= 10) {
613 offset_t pos
= url_ftell(pb
);
614 av_log(matroska
->ctx
, AV_LOG_ERROR
,
615 "Invalid float element size %d at position %llu (0x%llx)\n",
617 return AVERROR_INVALIDDATA
;
620 av_log(matroska
->ctx
, AV_LOG_ERROR
,
621 "FIXME! 10-byte floats unimplemented\n");
622 return AVERROR_UNKNOWN
;
629 #ifdef WORDS_BIGENDIAN
630 ((uint8_t *) &f
)[3 - size
] = get_byte(pb
);
632 ((uint8_t *) &f
)[size
] = get_byte(pb
);
640 #ifdef WORDS_BIGENDIAN
641 ((uint8_t *) &d
)[7 - size
] = get_byte(pb
);
643 ((uint8_t *) &d
)[size
] = get_byte(pb
);
653 * Read the next element as an ASCII string.
654 * 0 is success, < 0 is failure.
658 ebml_read_ascii (MatroskaDemuxContext
*matroska
,
662 ByteIOContext
*pb
= &matroska
->ctx
->pb
;
666 if ((res
= ebml_read_element_id(matroska
, id
, NULL
)) < 0 ||
667 (res
= ebml_read_element_length(matroska
, &rlength
)) < 0)
671 /* ebml strings are usually not 0-terminated, so we allocate one
672 * byte more, read the string and NULL-terminate it ourselves. */
673 if (size
< 0 || !(*str
= av_malloc(size
+ 1))) {
674 av_log(matroska
->ctx
, AV_LOG_ERROR
, "Memory allocation failed\n");
675 return AVERROR_NOMEM
;
677 if (get_buffer(pb
, (uint8_t *) *str
, size
) != size
) {
678 offset_t pos
= url_ftell(pb
);
679 av_log(matroska
->ctx
, AV_LOG_ERROR
,
680 "Read error at pos. %llu (0x%llx)\n", pos
, pos
);
689 * Read the next element as a UTF-8 string.
690 * 0 is success, < 0 is failure.
694 ebml_read_utf8 (MatroskaDemuxContext
*matroska
,
698 return ebml_read_ascii(matroska
, id
, str
);
702 * Read the next element as a date (nanoseconds since 1/1/2000).
703 * 0 is success, < 0 is failure.
707 ebml_read_date (MatroskaDemuxContext
*matroska
,
711 return ebml_read_sint(matroska
, id
, date
);
715 * Read the next element, but only the header. The contents
716 * are supposed to be sub-elements which can be read separately.
717 * 0 is success, < 0 is failure.
721 ebml_read_master (MatroskaDemuxContext
*matroska
,
724 ByteIOContext
*pb
= &matroska
->ctx
->pb
;
726 MatroskaLevel
*level
;
729 if ((res
= ebml_read_element_id(matroska
, id
, NULL
)) < 0 ||
730 (res
= ebml_read_element_length(matroska
, &length
)) < 0)
733 /* protect... (Heaven forbids that the '>' is true) */
734 if (matroska
->num_levels
>= EBML_MAX_DEPTH
) {
735 av_log(matroska
->ctx
, AV_LOG_ERROR
,
736 "File moves beyond max. allowed depth (%d)\n", EBML_MAX_DEPTH
);
737 return AVERROR_NOTSUPP
;
741 level
= &matroska
->levels
[matroska
->num_levels
++];
742 level
->start
= url_ftell(pb
);
743 level
->length
= length
;
749 * Read the next element as binary data.
750 * 0 is success, < 0 is failure.
754 ebml_read_binary (MatroskaDemuxContext
*matroska
,
759 ByteIOContext
*pb
= &matroska
->ctx
->pb
;
763 if ((res
= ebml_read_element_id(matroska
, id
, NULL
)) < 0 ||
764 (res
= ebml_read_element_length(matroska
, &rlength
)) < 0)
768 if (!(*binary
= av_malloc(*size
))) {
769 av_log(matroska
->ctx
, AV_LOG_ERROR
,
770 "Memory allocation error\n");
771 return AVERROR_NOMEM
;
774 if (get_buffer(pb
, *binary
, *size
) != *size
) {
775 offset_t pos
= url_ftell(pb
);
776 av_log(matroska
->ctx
, AV_LOG_ERROR
,
777 "Read error at pos. %llu (0x%llx)\n", pos
, pos
);
785 * Read signed/unsigned "EBML" numbers.
786 * Return: number of bytes processed, < 0 on error.
787 * XXX: use ebml_read_num().
791 matroska_ebmlnum_uint (uint8_t *data
,
795 int len_mask
= 0x80, read
= 1, n
= 1, num_ffs
= 0;
799 return AVERROR_INVALIDDATA
;
802 while (read
<= 8 && !(total
& len_mask
)) {
807 return AVERROR_INVALIDDATA
;
809 if ((total
&= (len_mask
- 1)) == len_mask
- 1)
812 return AVERROR_INVALIDDATA
;
816 total
= (total
<< 8) | data
[n
];
821 return AVERROR_INVALIDDATA
;
832 * Same as above, but signed.
836 matroska_ebmlnum_sint (uint8_t *data
,
843 /* read as unsigned number first */
844 if ((res
= matroska_ebmlnum_uint(data
, size
, &unum
)) < 0)
847 /* make signed (weird way) */
848 if (unum
== (uint64_t)-1)
851 *num
= unum
- ((1LL << ((7 * res
) - 1)) - 1);
857 * Read an EBML header.
858 * 0 is success, < 0 is failure.
862 ebml_read_header (MatroskaDemuxContext
*matroska
,
867 int level_up
, res
= 0;
875 if (!(id
= ebml_peek_id(matroska
, &level_up
)) ||
876 level_up
!= 0 || id
!= EBML_ID_HEADER
) {
877 av_log(matroska
->ctx
, AV_LOG_ERROR
,
878 "This is not an EBML file (id=0x%x/0x%x)\n", id
, EBML_ID_HEADER
);
879 return AVERROR_INVALIDDATA
;
881 if ((res
= ebml_read_master(matroska
, &id
)) < 0)
885 if (!(id
= ebml_peek_id(matroska
, &level_up
)))
893 /* is our read version uptodate? */
894 case EBML_ID_EBMLREADVERSION
: {
897 if ((res
= ebml_read_uint(matroska
, &id
, &num
)) < 0)
899 if (num
> EBML_VERSION
) {
900 av_log(matroska
->ctx
, AV_LOG_ERROR
,
901 "EBML version %llu (> %d) is not supported\n",
903 return AVERROR_INVALIDDATA
;
908 /* we only handle 8 byte lengths at max */
909 case EBML_ID_EBMLMAXSIZELENGTH
: {
912 if ((res
= ebml_read_uint(matroska
, &id
, &num
)) < 0)
914 if (num
> sizeof(uint64_t)) {
915 av_log(matroska
->ctx
, AV_LOG_ERROR
,
916 "Integers of size %llu (> %d) not supported\n",
917 num
, sizeof(uint64_t));
918 return AVERROR_INVALIDDATA
;
923 /* we handle 4 byte IDs at max */
924 case EBML_ID_EBMLMAXIDLENGTH
: {
927 if ((res
= ebml_read_uint(matroska
, &id
, &num
)) < 0)
929 if (num
> sizeof(uint32_t)) {
930 av_log(matroska
->ctx
, AV_LOG_ERROR
,
931 "IDs of size %llu (> %u) not supported\n",
932 num
, sizeof(uint32_t));
933 return AVERROR_INVALIDDATA
;
938 case EBML_ID_DOCTYPE
: {
941 if ((res
= ebml_read_ascii(matroska
, &id
, &text
)) < 0)
952 case EBML_ID_DOCTYPEREADVERSION
: {
955 if ((res
= ebml_read_uint(matroska
, &id
, &num
)) < 0)
963 av_log(matroska
->ctx
, AV_LOG_INFO
,
964 "Unknown data type 0x%x in EBML header", id
);
968 /* we ignore these two, as they don't tell us anything we
970 case EBML_ID_EBMLVERSION
:
971 case EBML_ID_DOCTYPEVERSION
:
972 res
= ebml_read_skip (matroska
);
981 * Put one packet in an application-supplied AVPacket struct.
982 * Returns 0 on success or -1 on failure.
986 matroska_deliver_packet (MatroskaDemuxContext
*matroska
,
989 if (matroska
->num_packets
> 0) {
990 memcpy(pkt
, matroska
->packets
[0], sizeof(AVPacket
));
991 av_free(matroska
->packets
[0]);
992 if (matroska
->num_packets
> 1) {
993 memmove(&matroska
->packets
[0], &matroska
->packets
[1],
994 (matroska
->num_packets
- 1) * sizeof(AVPacket
*));
996 av_realloc(matroska
->packets
, (matroska
->num_packets
- 1) *
999 av_free(matroska
->packets
);
1000 matroska
->packets
= NULL
;
1002 matroska
->num_packets
--;
1010 * Put a packet into our internal queue. Will be delivered to the
1011 * user/application during the next get_packet() call.
1015 matroska_queue_packet (MatroskaDemuxContext
*matroska
,
1019 av_realloc(matroska
->packets
, (matroska
->num_packets
+ 1) *
1020 sizeof(AVPacket
*));
1021 matroska
->packets
[matroska
->num_packets
] = pkt
;
1022 matroska
->num_packets
++;
1030 matroska_probe (AVProbeData
*p
)
1033 int len_mask
= 0x80, size
= 1, n
= 1;
1034 uint8_t probe_data
[] = { 'm', 'a', 't', 'r', 'o', 's', 'k', 'a' };
1036 if (p
->buf_size
< 5)
1040 if ((p
->buf
[0] << 24 | p
->buf
[1] << 16 |
1041 p
->buf
[2] << 8 | p
->buf
[3]) != EBML_ID_HEADER
)
1044 /* length of header */
1046 while (size
<= 8 && !(total
& len_mask
)) {
1052 total
&= (len_mask
- 1);
1054 total
= (total
<< 8) | p
->buf
[4 + n
++];
1056 /* does the probe data contain the whole header? */
1057 if (p
->buf_size
< 4 + size
+ total
)
1060 /* the header must contain the document type 'matroska'. For now,
1061 * we don't parse the whole header but simply check for the
1062 * availability of that array of characters inside the header.
1063 * Not fully fool-proof, but good enough. */
1064 for (n
= 4 + size
; n
< 4 + size
+ total
- sizeof(probe_data
); n
++)
1065 if (!memcmp (&p
->buf
[n
], probe_data
, sizeof(probe_data
)))
1066 return AVPROBE_SCORE_MAX
;
1072 * From here on, it's all XML-style DTD stuff... Needs no comments.
1076 matroska_parse_info (MatroskaDemuxContext
*matroska
)
1081 av_log(matroska
->ctx
, AV_LOG_DEBUG
, "Parsing info...\n");
1084 if (!(id
= ebml_peek_id(matroska
, &matroska
->level_up
))) {
1087 } else if (matroska
->level_up
) {
1088 matroska
->level_up
--;
1093 /* cluster timecode */
1094 case MATROSKA_ID_TIMECODESCALE
: {
1096 if ((res
= ebml_read_uint(matroska
, &id
, &num
)) < 0)
1098 matroska
->time_scale
= num
;
1102 case MATROSKA_ID_DURATION
: {
1104 if ((res
= ebml_read_float(matroska
, &id
, &num
)) < 0)
1106 matroska
->duration
= num
* matroska
->time_scale
;
1110 case MATROSKA_ID_WRITINGAPP
: {
1112 if ((res
= ebml_read_utf8(matroska
, &id
, &text
)) < 0)
1114 matroska
->writing_app
= text
;
1118 case MATROSKA_ID_MUXINGAPP
: {
1120 if ((res
= ebml_read_utf8(matroska
, &id
, &text
)) < 0)
1122 matroska
->muxing_app
= text
;
1126 case MATROSKA_ID_DATEUTC
: {
1128 if ((res
= ebml_read_date(matroska
, &id
, &time
)) < 0)
1130 matroska
->created
= time
;
1135 av_log(matroska
->ctx
, AV_LOG_INFO
,
1136 "Unknown entry 0x%x in info header\n", id
);
1140 res
= ebml_read_skip(matroska
);
1144 if (matroska
->level_up
) {
1145 matroska
->level_up
--;
1154 matroska_add_stream (MatroskaDemuxContext
*matroska
)
1158 MatroskaTrack
*track
;
1160 av_log(matroska
->ctx
, AV_LOG_DEBUG
, "parsing track, adding stream..,\n");
1162 /* Allocate a generic track. As soon as we know its type we'll realloc. */
1163 track
= av_mallocz(sizeof(MatroskaTrack
));
1164 matroska
->num_tracks
++;
1166 /* start with the master */
1167 if ((res
= ebml_read_master(matroska
, &id
)) < 0)
1170 /* try reading the trackentry headers */
1172 if (!(id
= ebml_peek_id(matroska
, &matroska
->level_up
))) {
1175 } else if (matroska
->level_up
> 0) {
1176 matroska
->level_up
--;
1181 /* track number (unique stream ID) */
1182 case MATROSKA_ID_TRACKNUMBER
: {
1184 if ((res
= ebml_read_uint(matroska
, &id
, &num
)) < 0)
1190 /* track UID (unique identifier) */
1191 case MATROSKA_ID_TRACKUID
: {
1193 if ((res
= ebml_read_uint(matroska
, &id
, &num
)) < 0)
1199 /* track type (video, audio, combined, subtitle, etc.) */
1200 case MATROSKA_ID_TRACKTYPE
: {
1202 if (track
->type
!= 0) {
1203 av_log(matroska
->ctx
, AV_LOG_INFO
,
1204 "More than one tracktype in an entry - skip\n");
1207 if ((res
= ebml_read_uint(matroska
, &id
, &num
)) < 0)
1211 /* ok, so we're actually going to reallocate this thing */
1212 switch (track
->type
) {
1213 case MATROSKA_TRACK_TYPE_VIDEO
:
1214 track
= (MatroskaTrack
*)
1215 av_realloc(track
, sizeof(MatroskaVideoTrack
));
1217 case MATROSKA_TRACK_TYPE_AUDIO
:
1218 track
= (MatroskaTrack
*)
1219 av_realloc(track
, sizeof(MatroskaAudioTrack
));
1220 ((MatroskaAudioTrack
*)track
)->channels
= 1;
1221 ((MatroskaAudioTrack
*)track
)->samplerate
= 8000;
1223 case MATROSKA_TRACK_TYPE_SUBTITLE
:
1224 track
= (MatroskaTrack
*)
1225 av_realloc(track
, sizeof(MatroskaSubtitleTrack
));
1227 case MATROSKA_TRACK_TYPE_COMPLEX
:
1228 case MATROSKA_TRACK_TYPE_LOGO
:
1229 case MATROSKA_TRACK_TYPE_CONTROL
:
1231 av_log(matroska
->ctx
, AV_LOG_INFO
,
1232 "Unknown or unsupported track type 0x%x\n",
1237 matroska
->tracks
[matroska
->num_tracks
- 1] = track
;
1241 /* tracktype specific stuff for video */
1242 case MATROSKA_ID_TRACKVIDEO
: {
1243 MatroskaVideoTrack
*videotrack
;
1244 if (track
->type
!= MATROSKA_TRACK_TYPE_VIDEO
) {
1245 av_log(matroska
->ctx
, AV_LOG_INFO
,
1246 "video data in non-video track - ignoring\n");
1247 res
= AVERROR_INVALIDDATA
;
1249 } else if ((res
= ebml_read_master(matroska
, &id
)) < 0)
1251 videotrack
= (MatroskaVideoTrack
*)track
;
1254 if (!(id
= ebml_peek_id(matroska
, &matroska
->level_up
))) {
1257 } else if (matroska
->level_up
> 0) {
1258 matroska
->level_up
--;
1263 /* fixme, this should be one-up, but I get it here */
1264 case MATROSKA_ID_TRACKDEFAULTDURATION
: {
1266 if ((res
= ebml_read_uint (matroska
, &id
,
1269 track
->default_duration
= num
;
1273 /* video framerate */
1274 case MATROSKA_ID_VIDEOFRAMERATE
: {
1276 if ((res
= ebml_read_float(matroska
, &id
,
1279 track
->default_duration
= 1000000000 * (1. / num
);
1283 /* width of the size to display the video at */
1284 case MATROSKA_ID_VIDEODISPLAYWIDTH
: {
1286 if ((res
= ebml_read_uint(matroska
, &id
,
1289 videotrack
->display_width
= num
;
1293 /* height of the size to display the video at */
1294 case MATROSKA_ID_VIDEODISPLAYHEIGHT
: {
1296 if ((res
= ebml_read_uint(matroska
, &id
,
1299 videotrack
->display_height
= num
;
1303 /* width of the video in the file */
1304 case MATROSKA_ID_VIDEOPIXELWIDTH
: {
1306 if ((res
= ebml_read_uint(matroska
, &id
,
1309 videotrack
->pixel_width
= num
;
1313 /* height of the video in the file */
1314 case MATROSKA_ID_VIDEOPIXELHEIGHT
: {
1316 if ((res
= ebml_read_uint(matroska
, &id
,
1319 videotrack
->pixel_height
= num
;
1323 /* whether the video is interlaced */
1324 case MATROSKA_ID_VIDEOFLAGINTERLACED
: {
1326 if ((res
= ebml_read_uint(matroska
, &id
,
1331 MATROSKA_VIDEOTRACK_INTERLACED
;
1334 ~MATROSKA_VIDEOTRACK_INTERLACED
;
1338 /* stereo mode (whether the video has two streams,
1339 * where one is for the left eye and the other for
1340 * the right eye, which creates a 3D-like
1342 case MATROSKA_ID_VIDEOSTEREOMODE
: {
1344 if ((res
= ebml_read_uint(matroska
, &id
,
1347 if (num
!= MATROSKA_EYE_MODE_MONO
&&
1348 num
!= MATROSKA_EYE_MODE_LEFT
&&
1349 num
!= MATROSKA_EYE_MODE_RIGHT
&&
1350 num
!= MATROSKA_EYE_MODE_BOTH
) {
1351 av_log(matroska
->ctx
, AV_LOG_INFO
,
1352 "Ignoring unknown eye mode 0x%x\n",
1356 videotrack
->eye_mode
= num
;
1360 /* aspect ratio behaviour */
1361 case MATROSKA_ID_VIDEOASPECTRATIO
: {
1363 if ((res
= ebml_read_uint(matroska
, &id
,
1366 if (num
!= MATROSKA_ASPECT_RATIO_MODE_FREE
&&
1367 num
!= MATROSKA_ASPECT_RATIO_MODE_KEEP
&&
1368 num
!= MATROSKA_ASPECT_RATIO_MODE_FIXED
) {
1369 av_log(matroska
->ctx
, AV_LOG_INFO
,
1370 "Ignoring unknown aspect ratio 0x%x\n",
1374 videotrack
->ar_mode
= num
;
1378 /* colourspace (only matters for raw video)
1380 case MATROSKA_ID_VIDEOCOLOURSPACE
: {
1382 if ((res
= ebml_read_uint(matroska
, &id
,
1385 videotrack
->fourcc
= num
;
1390 av_log(matroska
->ctx
, AV_LOG_INFO
,
1391 "Unknown video track header entry "
1392 "0x%x - ignoring\n", id
);
1396 res
= ebml_read_skip(matroska
);
1400 if (matroska
->level_up
) {
1401 matroska
->level_up
--;
1408 /* tracktype specific stuff for audio */
1409 case MATROSKA_ID_TRACKAUDIO
: {
1410 MatroskaAudioTrack
*audiotrack
;
1411 if (track
->type
!= MATROSKA_TRACK_TYPE_AUDIO
) {
1412 av_log(matroska
->ctx
, AV_LOG_INFO
,
1413 "audio data in non-audio track - ignoring\n");
1414 res
= AVERROR_INVALIDDATA
;
1416 } else if ((res
= ebml_read_master(matroska
, &id
)) < 0)
1418 audiotrack
= (MatroskaAudioTrack
*)track
;
1421 if (!(id
= ebml_peek_id(matroska
, &matroska
->level_up
))) {
1424 } else if (matroska
->level_up
> 0) {
1425 matroska
->level_up
--;
1431 case MATROSKA_ID_AUDIOSAMPLINGFREQ
: {
1433 if ((res
= ebml_read_float(matroska
, &id
,
1436 audiotrack
->samplerate
= num
;
1441 case MATROSKA_ID_AUDIOBITDEPTH
: {
1443 if ((res
= ebml_read_uint(matroska
, &id
,
1446 audiotrack
->bitdepth
= num
;
1451 case MATROSKA_ID_AUDIOCHANNELS
: {
1453 if ((res
= ebml_read_uint(matroska
, &id
,
1456 audiotrack
->channels
= num
;
1461 av_log(matroska
->ctx
, AV_LOG_INFO
,
1462 "Unknown audio track header entry "
1463 "0x%x - ignoring\n", id
);
1467 res
= ebml_read_skip(matroska
);
1471 if (matroska
->level_up
) {
1472 matroska
->level_up
--;
1479 /* codec identifier */
1480 case MATROSKA_ID_CODECID
: {
1482 if ((res
= ebml_read_ascii(matroska
, &id
, &text
)) < 0)
1484 track
->codec_id
= text
;
1488 /* codec private data */
1489 case MATROSKA_ID_CODECPRIVATE
: {
1492 if ((res
= ebml_read_binary(matroska
, &id
, &data
, &size
) < 0))
1494 track
->codec_priv
= data
;
1495 track
->codec_priv_size
= size
;
1499 /* name of the codec */
1500 case MATROSKA_ID_CODECNAME
: {
1502 if ((res
= ebml_read_utf8(matroska
, &id
, &text
)) < 0)
1504 track
->codec_name
= text
;
1508 /* name of this track */
1509 case MATROSKA_ID_TRACKNAME
: {
1511 if ((res
= ebml_read_utf8(matroska
, &id
, &text
)) < 0)
1517 /* language (matters for audio/subtitles, mostly) */
1518 case MATROSKA_ID_TRACKLANGUAGE
: {
1520 if ((res
= ebml_read_utf8(matroska
, &id
, &text
)) < 0)
1522 track
->language
= text
;
1526 /* whether this is actually used */
1527 case MATROSKA_ID_TRACKFLAGENABLED
: {
1529 if ((res
= ebml_read_uint(matroska
, &id
, &num
)) < 0)
1532 track
->flags
|= MATROSKA_TRACK_ENABLED
;
1534 track
->flags
&= ~MATROSKA_TRACK_ENABLED
;
1538 /* whether it's the default for this track type */
1539 case MATROSKA_ID_TRACKFLAGDEFAULT
: {
1541 if ((res
= ebml_read_uint(matroska
, &id
, &num
)) < 0)
1544 track
->flags
|= MATROSKA_TRACK_DEFAULT
;
1546 track
->flags
&= ~MATROSKA_TRACK_DEFAULT
;
1550 /* lacing (like MPEG, where blocks don't end/start on frame
1552 case MATROSKA_ID_TRACKFLAGLACING
: {
1554 if ((res
= ebml_read_uint(matroska
, &id
, &num
)) < 0)
1557 track
->flags
|= MATROSKA_TRACK_LACING
;
1559 track
->flags
&= ~MATROSKA_TRACK_LACING
;
1563 /* default length (in time) of one data block in this track */
1564 case MATROSKA_ID_TRACKDEFAULTDURATION
: {
1566 if ((res
= ebml_read_uint(matroska
, &id
, &num
)) < 0)
1568 track
->default_duration
= num
;
1573 av_log(matroska
->ctx
, AV_LOG_INFO
,
1574 "Unknown track header entry 0x%x - ignoring\n", id
);
1578 /* we ignore these because they're nothing useful. */
1579 case MATROSKA_ID_CODECINFOURL
:
1580 case MATROSKA_ID_CODECDOWNLOADURL
:
1581 case MATROSKA_ID_TRACKMINCACHE
:
1582 case MATROSKA_ID_TRACKMAXCACHE
:
1583 res
= ebml_read_skip(matroska
);
1587 if (matroska
->level_up
) {
1588 matroska
->level_up
--;
1597 matroska_parse_tracks (MatroskaDemuxContext
*matroska
)
1602 av_log(matroska
->ctx
, AV_LOG_DEBUG
, "parsing tracks...\n");
1605 if (!(id
= ebml_peek_id(matroska
, &matroska
->level_up
))) {
1608 } else if (matroska
->level_up
) {
1609 matroska
->level_up
--;
1614 /* one track within the "all-tracks" header */
1615 case MATROSKA_ID_TRACKENTRY
:
1616 res
= matroska_add_stream(matroska
);
1620 av_log(matroska
->ctx
, AV_LOG_INFO
,
1621 "Unknown entry 0x%x in track header\n", id
);
1625 res
= ebml_read_skip(matroska
);
1629 if (matroska
->level_up
) {
1630 matroska
->level_up
--;
1639 matroska_parse_index (MatroskaDemuxContext
*matroska
)
1643 MatroskaDemuxIndex idx
;
1645 av_log(matroska
->ctx
, AV_LOG_DEBUG
, "parsing index...\n");
1648 if (!(id
= ebml_peek_id(matroska
, &matroska
->level_up
))) {
1651 } else if (matroska
->level_up
) {
1652 matroska
->level_up
--;
1657 /* one single index entry ('point') */
1658 case MATROSKA_ID_POINTENTRY
:
1659 if ((res
= ebml_read_master(matroska
, &id
)) < 0)
1662 /* in the end, we hope to fill one entry with a
1663 * timestamp, a file position and a tracknum */
1664 idx
.pos
= (uint64_t) -1;
1665 idx
.time
= (uint64_t) -1;
1666 idx
.track
= (uint16_t) -1;
1669 if (!(id
= ebml_peek_id(matroska
, &matroska
->level_up
))) {
1672 } else if (matroska
->level_up
) {
1673 matroska
->level_up
--;
1678 /* one single index entry ('point') */
1679 case MATROSKA_ID_CUETIME
: {
1681 if ((res
= ebml_read_uint(matroska
, &id
,
1684 idx
.time
= time
* matroska
->time_scale
;
1688 /* position in the file + track to which it
1690 case MATROSKA_ID_CUETRACKPOSITION
:
1691 if ((res
= ebml_read_master(matroska
, &id
)) < 0)
1695 if (!(id
= ebml_peek_id (matroska
,
1696 &matroska
->level_up
))) {
1699 } else if (matroska
->level_up
) {
1700 matroska
->level_up
--;
1706 case MATROSKA_ID_CUETRACK
: {
1708 if ((res
= ebml_read_uint(matroska
,
1715 /* position in file */
1716 case MATROSKA_ID_CUECLUSTERPOSITION
: {
1718 if ((res
= ebml_read_uint(matroska
,
1726 av_log(matroska
->ctx
, AV_LOG_INFO
,
1727 "Unknown entry 0x%x in "
1728 "CuesTrackPositions\n", id
);
1732 res
= ebml_read_skip(matroska
);
1736 if (matroska
->level_up
) {
1737 matroska
->level_up
--;
1745 av_log(matroska
->ctx
, AV_LOG_INFO
,
1746 "Unknown entry 0x%x in cuespoint "
1751 res
= ebml_read_skip(matroska
);
1755 if (matroska
->level_up
) {
1756 matroska
->level_up
--;
1761 /* so let's see if we got what we wanted */
1762 if (idx
.pos
!= (uint64_t) -1 &&
1763 idx
.time
!= (uint64_t) -1 &&
1764 idx
.track
!= (uint16_t) -1) {
1765 if (matroska
->num_indexes
% 32 == 0) {
1766 /* re-allocate bigger index */
1768 av_realloc(matroska
->index
,
1769 (matroska
->num_indexes
+ 32) *
1770 sizeof(MatroskaDemuxIndex
));
1772 matroska
->index
[matroska
->num_indexes
] = idx
;
1773 matroska
->num_indexes
++;
1778 av_log(matroska
->ctx
, AV_LOG_INFO
,
1779 "Unknown entry 0x%x in cues header\n", id
);
1783 res
= ebml_read_skip(matroska
);
1787 if (matroska
->level_up
) {
1788 matroska
->level_up
--;
1797 matroska_parse_metadata (MatroskaDemuxContext
*matroska
)
1803 if (!(id
= ebml_peek_id(matroska
, &matroska
->level_up
))) {
1806 } else if (matroska
->level_up
) {
1807 matroska
->level_up
--;
1812 /* Hm, this is unsupported... */
1814 av_log(matroska
->ctx
, AV_LOG_INFO
,
1815 "Unknown entry 0x%x in metadata header\n", id
);
1819 res
= ebml_read_skip(matroska
);
1823 if (matroska
->level_up
) {
1824 matroska
->level_up
--;
1833 matroska_parse_seekhead (MatroskaDemuxContext
*matroska
)
1838 av_log(matroska
->ctx
, AV_LOG_DEBUG
, "parsing seekhead...\n");
1841 if (!(id
= ebml_peek_id(matroska
, &matroska
->level_up
))) {
1844 } else if (matroska
->level_up
) {
1845 matroska
->level_up
--;
1850 case MATROSKA_ID_SEEKENTRY
: {
1851 uint32_t seek_id
= 0, peek_id_cache
= 0;
1852 uint64_t seek_pos
= (uint64_t) -1, t
;
1854 if ((res
= ebml_read_master(matroska
, &id
)) < 0)
1858 if (!(id
= ebml_peek_id(matroska
, &matroska
->level_up
))) {
1861 } else if (matroska
->level_up
) {
1862 matroska
->level_up
--;
1867 case MATROSKA_ID_SEEKID
:
1868 res
= ebml_read_uint(matroska
, &id
, &t
);
1872 case MATROSKA_ID_SEEKPOSITION
:
1873 res
= ebml_read_uint(matroska
, &id
, &seek_pos
);
1877 av_log(matroska
->ctx
, AV_LOG_INFO
,
1878 "Unknown seekhead ID 0x%x\n", id
);
1882 res
= ebml_read_skip(matroska
);
1886 if (matroska
->level_up
) {
1887 matroska
->level_up
--;
1892 if (!seek_id
|| seek_pos
== (uint64_t) -1) {
1893 av_log(matroska
->ctx
, AV_LOG_INFO
,
1894 "Incomplete seekhead entry (0x%x/%llu)\n",
1900 case MATROSKA_ID_CUES
:
1901 case MATROSKA_ID_TAGS
: {
1902 uint32_t level_up
= matroska
->level_up
;
1903 offset_t before_pos
;
1905 MatroskaLevel level
;
1907 /* remember the peeked ID and the current position */
1908 peek_id_cache
= matroska
->peek_id
;
1909 before_pos
= url_ftell(&matroska
->ctx
->pb
);
1912 if ((res
= ebml_read_seek(matroska
, seek_pos
+
1913 matroska
->segment_start
)) < 0)
1916 /* we don't want to lose our seekhead level, so we add
1917 * a dummy. This is a crude hack. */
1918 if (matroska
->num_levels
== EBML_MAX_DEPTH
) {
1919 av_log(matroska
->ctx
, AV_LOG_INFO
,
1920 "Max EBML element depth (%d) reached, "
1921 "cannot parse further.\n", EBML_MAX_DEPTH
);
1922 return AVERROR_UNKNOWN
;
1926 level
.length
= (uint64_t)-1;
1927 matroska
->levels
[matroska
->num_levels
] = level
;
1928 matroska
->num_levels
++;
1931 if (!(id
= ebml_peek_id (matroska
,
1932 &matroska
->level_up
)))
1934 if (id
!= seek_id
) {
1935 av_log(matroska
->ctx
, AV_LOG_INFO
,
1936 "We looked for ID=0x%x but got "
1937 "ID=0x%x (pos=%llu)",
1938 seek_id
, id
, seek_pos
+
1939 matroska
->segment_start
);
1943 /* read master + parse */
1944 if ((res
= ebml_read_master(matroska
, &id
)) < 0)
1947 case MATROSKA_ID_CUES
:
1948 if (!(res
= matroska_parse_index(matroska
)) ||
1949 url_feof(&matroska
->ctx
->pb
)) {
1950 matroska
->index_parsed
= 1;
1954 case MATROSKA_ID_TAGS
:
1955 if (!(res
= matroska_parse_metadata(matroska
)) ||
1956 url_feof(&matroska
->ctx
->pb
)) {
1957 matroska
->metadata_parsed
= 1;
1966 /* remove dummy level */
1967 while (matroska
->num_levels
) {
1968 matroska
->num_levels
--;
1970 matroska
->levels
[matroska
->num_levels
].length
;
1971 if (length
== (uint64_t)-1)
1976 if ((res
= ebml_read_seek(matroska
, before_pos
)) < 0)
1978 matroska
->peek_id
= peek_id_cache
;
1979 matroska
->level_up
= level_up
;
1984 av_log(matroska
->ctx
, AV_LOG_INFO
,
1985 "Ignoring seekhead entry for ID=0x%x\n",
1994 av_log(matroska
->ctx
, AV_LOG_INFO
,
1995 "Unknown seekhead ID 0x%x\n", id
);
1999 res
= ebml_read_skip(matroska
);
2003 if (matroska
->level_up
) {
2004 matroska
->level_up
--;
2013 matroska_read_header (AVFormatContext
*s
,
2014 AVFormatParameters
*ap
)
2016 MatroskaDemuxContext
*matroska
= s
->priv_data
;
2018 int version
, last_level
, res
= 0;
2023 /* First read the EBML header. */
2025 if ((res
= ebml_read_header(matroska
, &doctype
, &version
)) < 0)
2027 if ((doctype
== NULL
) || strcmp(doctype
, "matroska")) {
2028 av_log(matroska
->ctx
, AV_LOG_ERROR
,
2029 "Wrong EBML doctype ('%s' != 'matroska').\n",
2030 doctype ? doctype
: "(none)");
2033 return AVERROR_NOFMT
;
2037 av_log(matroska
->ctx
, AV_LOG_ERROR
,
2038 "Matroska demuxer version 1 too old for file version %d\n",
2040 return AVERROR_NOFMT
;
2043 /* The next thing is a segment. */
2045 if (!(id
= ebml_peek_id(matroska
, &last_level
)))
2047 if (id
== MATROSKA_ID_SEGMENT
)
2051 av_log(matroska
->ctx
, AV_LOG_INFO
,
2052 "Expected a Segment ID (0x%x), but received 0x%x!\n",
2053 MATROSKA_ID_SEGMENT
, id
);
2054 if ((res
= ebml_read_skip(matroska
)) < 0)
2058 /* We now have a Matroska segment.
2059 * Seeks are from the beginning of the segment,
2060 * after the segment ID/length. */
2061 if ((res
= ebml_read_master(matroska
, &id
)) < 0)
2063 matroska
->segment_start
= url_ftell(&s
->pb
);
2065 matroska
->time_scale
= 1000000;
2066 /* we've found our segment, start reading the different contents in here */
2068 if (!(id
= ebml_peek_id(matroska
, &matroska
->level_up
))) {
2071 } else if (matroska
->level_up
) {
2072 matroska
->level_up
--;
2078 case MATROSKA_ID_INFO
: {
2079 if ((res
= ebml_read_master(matroska
, &id
)) < 0)
2081 res
= matroska_parse_info(matroska
);
2085 /* track info headers */
2086 case MATROSKA_ID_TRACKS
: {
2087 if ((res
= ebml_read_master(matroska
, &id
)) < 0)
2089 res
= matroska_parse_tracks(matroska
);
2094 case MATROSKA_ID_CUES
: {
2095 if (!matroska
->index_parsed
) {
2096 if ((res
= ebml_read_master(matroska
, &id
)) < 0)
2098 res
= matroska_parse_index(matroska
);
2100 res
= ebml_read_skip(matroska
);
2105 case MATROSKA_ID_TAGS
: {
2106 if (!matroska
->metadata_parsed
) {
2107 if ((res
= ebml_read_master(matroska
, &id
)) < 0)
2109 res
= matroska_parse_metadata(matroska
);
2111 res
= ebml_read_skip(matroska
);
2115 /* file index (if seekable, seek to Cues/Tags to parse it) */
2116 case MATROSKA_ID_SEEKHEAD
: {
2117 if ((res
= ebml_read_master(matroska
, &id
)) < 0)
2119 res
= matroska_parse_seekhead(matroska
);
2123 case MATROSKA_ID_CLUSTER
: {
2124 /* Do not read the master - this will be done in the next
2125 * call to matroska_read_packet. */
2131 av_log(matroska
->ctx
, AV_LOG_INFO
,
2132 "Unknown matroska file header ID 0x%x\n", id
);
2136 res
= ebml_read_skip(matroska
);
2140 if (matroska
->level_up
) {
2141 matroska
->level_up
--;
2149 /* Have we found a cluster? */
2152 enum CodecID codec_id
;
2153 MatroskaTrack
*track
;
2155 void *extradata
= NULL
;
2156 int extradata_size
= 0;
2158 for (i
= 0; i
< matroska
->num_tracks
; i
++) {
2159 track
= matroska
->tracks
[i
];
2161 /* libavformat does not really support subtitles.
2162 * Also apply some sanity checks. */
2163 if ((track
->type
== MATROSKA_TRACK_TYPE_SUBTITLE
) ||
2164 (track
->codec_id
== NULL
))
2167 /* Set the FourCC from the CodecID. */
2168 /* This is the MS compatibility mode which stores a
2169 * BITMAPINFOHEADER in the CodecPrivate. */
2170 if (!strcmp(track
->codec_id
,
2171 MATROSKA_CODEC_ID_VIDEO_VFW_FOURCC
) &&
2172 (track
->codec_priv_size
>= 40) &&
2173 (track
->codec_priv
!= NULL
)) {
2176 /* Offset of biCompression. Stored in LE. */
2177 p
= (unsigned char *)track
->codec_priv
+ 16;
2178 ((MatroskaVideoTrack
*)track
)->fourcc
= (p
[3] << 24) |
2179 (p
[2] << 16) | (p
[1] << 8) | p
[0];
2180 codec_id
= codec_get_bmp_id(((MatroskaVideoTrack
*)track
)->fourcc
);
2182 } else if (!strcmp(track
->codec_id
,
2183 MATROSKA_CODEC_ID_VIDEO_MPEG4_SP
) ||
2184 !strcmp(track
->codec_id
,
2185 MATROSKA_CODEC_ID_VIDEO_MPEG4_ASP
) ||
2186 !strcmp(track
->codec_id
,
2187 MATROSKA_CODEC_ID_VIDEO_MPEG4_AP
))
2188 codec_id
= CODEC_ID_MPEG4
;
2189 else if (!strcmp(track
->codec_id
,
2190 MATROSKA_CODEC_ID_VIDEO_MPEG4_AVC
))
2191 codec_id
= CODEC_ID_H264
;
2192 /* else if (!strcmp(track->codec_id, */
2193 /* MATROSKA_CODEC_ID_VIDEO_UNCOMPRESSED)) */
2194 /* codec_id = CODEC_ID_???; */
2195 else if (!strcmp(track
->codec_id
,
2196 MATROSKA_CODEC_ID_VIDEO_MSMPEG4V3
))
2197 codec_id
= CODEC_ID_MSMPEG4V3
;
2198 else if (!strcmp(track
->codec_id
,
2199 MATROSKA_CODEC_ID_VIDEO_MPEG1
) ||
2200 !strcmp(track
->codec_id
,
2201 MATROSKA_CODEC_ID_VIDEO_MPEG2
))
2202 codec_id
= CODEC_ID_MPEG2VIDEO
;
2204 /* This is the MS compatibility mode which stores a
2205 * WAVEFORMATEX in the CodecPrivate. */
2206 else if (!strcmp(track
->codec_id
,
2207 MATROSKA_CODEC_ID_AUDIO_ACM
) &&
2208 (track
->codec_priv_size
>= 18) &&
2209 (track
->codec_priv
!= NULL
)) {
2213 /* Offset of wFormatTag. Stored in LE. */
2214 p
= (unsigned char *)track
->codec_priv
;
2215 tag
= (p
[1] << 8) | p
[0];
2216 codec_id
= codec_get_wav_id(tag
);
2218 } else if (!strcmp(track
->codec_id
,
2219 MATROSKA_CODEC_ID_AUDIO_MPEG1_L1
) ||
2220 !strcmp(track
->codec_id
,
2221 MATROSKA_CODEC_ID_AUDIO_MPEG1_L2
) ||
2222 !strcmp(track
->codec_id
,
2223 MATROSKA_CODEC_ID_AUDIO_MPEG1_L3
))
2224 codec_id
= CODEC_ID_MP3
;
2225 else if (!strcmp(track
->codec_id
,
2226 MATROSKA_CODEC_ID_AUDIO_PCM_INT_BE
))
2227 codec_id
= CODEC_ID_PCM_U16BE
;
2228 else if (!strcmp(track
->codec_id
,
2229 MATROSKA_CODEC_ID_AUDIO_PCM_INT_LE
))
2230 codec_id
= CODEC_ID_PCM_U16LE
;
2231 /* else if (!strcmp(track->codec_id, */
2232 /* MATROSKA_CODEC_ID_AUDIO_PCM_FLOAT)) */
2233 /* codec_id = CODEC_ID_PCM_???; */
2234 else if (!strcmp(track
->codec_id
,
2235 MATROSKA_CODEC_ID_AUDIO_AC3
))
2236 codec_id
= CODEC_ID_AC3
;
2237 else if (!strcmp(track
->codec_id
,
2238 MATROSKA_CODEC_ID_AUDIO_DTS
))
2239 codec_id
= CODEC_ID_DTS
;
2240 /* No such codec id so far. */
2241 /* else if (!strcmp(track->codec_id, */
2242 /* MATROSKA_CODEC_ID_AUDIO_DTS)) */
2243 /* codec_id = CODEC_ID_DTS; */
2244 else if (!strcmp(track
->codec_id
,
2245 MATROSKA_CODEC_ID_AUDIO_VORBIS
)) {
2246 extradata_size
= track
->codec_priv_size
;
2247 if(extradata_size
) {
2248 extradata
= av_malloc(extradata_size
);
2249 if(extradata
== NULL
)
2250 return AVERROR_NOMEM
;
2251 memcpy(extradata
, track
->codec_priv
, extradata_size
);
2253 codec_id
= CODEC_ID_VORBIS
;
2254 } else if (!strcmp(track
->codec_id
,
2255 MATROSKA_CODEC_ID_AUDIO_MPEG2
) ||
2256 !strcmp(track
->codec_id
,
2257 MATROSKA_CODEC_ID_AUDIO_MPEG4
))
2258 codec_id
= CODEC_ID_AAC
;
2260 codec_id
= CODEC_ID_NONE
;
2262 if (codec_id
== CODEC_ID_NONE
) {
2263 av_log(matroska
->ctx
, AV_LOG_INFO
,
2264 "Unknown/unsupported CodecID %s.\n",
2268 track
->stream_index
= matroska
->num_streams
;
2270 matroska
->num_streams
++;
2271 st
= av_new_stream(s
, track
->stream_index
);
2273 return AVERROR_NOMEM
;
2274 av_set_pts_info(st
, 24, 1, 1000); /* 24 bit pts in ms */
2276 st
->codec
->codec_id
= codec_id
;
2279 st
->codec
->extradata
= extradata
;
2280 st
->codec
->extradata_size
= extradata_size
;
2281 } else if(track
->codec_priv
&& track
->codec_priv_size
> 0){
2282 st
->codec
->extradata
= av_malloc(track
->codec_priv_size
);
2283 if(st
->codec
->extradata
== NULL
)
2284 return AVERROR_NOMEM
;
2285 st
->codec
->extradata_size
= track
->codec_priv_size
;
2286 memcpy(st
->codec
->extradata
, track
->codec_priv
,
2287 track
->codec_priv_size
);
2290 if (track
->type
== MATROSKA_TRACK_TYPE_VIDEO
) {
2291 MatroskaVideoTrack
*videotrack
= (MatroskaVideoTrack
*)track
;
2293 st
->codec
->codec_type
= CODEC_TYPE_VIDEO
;
2294 st
->codec
->codec_tag
= videotrack
->fourcc
;
2295 st
->codec
->width
= videotrack
->pixel_width
;
2296 st
->codec
->height
= videotrack
->pixel_height
;
2297 if (videotrack
->display_width
== 0)
2298 st
->codec
->sample_aspect_ratio
.num
=
2299 videotrack
->pixel_width
;
2301 st
->codec
->sample_aspect_ratio
.num
=
2302 videotrack
->display_width
;
2303 if (videotrack
->display_height
== 0)
2304 st
->codec
->sample_aspect_ratio
.num
=
2305 videotrack
->pixel_height
;
2307 st
->codec
->sample_aspect_ratio
.num
=
2308 videotrack
->display_height
;
2310 } else if (track
->type
== MATROSKA_TRACK_TYPE_AUDIO
) {
2311 MatroskaAudioTrack
*audiotrack
= (MatroskaAudioTrack
*)track
;
2313 st
->codec
->codec_type
= CODEC_TYPE_AUDIO
;
2314 st
->codec
->sample_rate
= audiotrack
->samplerate
;
2315 st
->codec
->channels
= audiotrack
->channels
;
2318 /* What do we do with private data? E.g. for Vorbis. */
2326 matroska_find_track_by_num (MatroskaDemuxContext
*matroska
,
2331 for (i
= 0; i
< matroska
->num_tracks
; i
++)
2332 if (matroska
->tracks
[i
]->num
== num
)
2339 matroska_parse_blockgroup (MatroskaDemuxContext
*matroska
,
2340 uint64_t cluster_time
)
2345 int is_keyframe
= PKT_FLAG_KEY
, last_num_packets
= matroska
->num_packets
;
2347 av_log(matroska
->ctx
, AV_LOG_DEBUG
, "parsing blockgroup...\n");
2350 if (!(id
= ebml_peek_id(matroska
, &matroska
->level_up
))) {
2353 } else if (matroska
->level_up
) {
2354 matroska
->level_up
--;
2359 /* one block inside the group. Note, block parsing is one
2360 * of the harder things, so this code is a bit complicated.
2361 * See http://www.matroska.org/ for documentation. */
2362 case MATROSKA_ID_BLOCK
: {
2363 uint8_t *data
, *origdata
;
2366 uint32_t *lace_size
= NULL
;
2367 int n
, track
, flags
, laces
= 0;
2369 int64_t pos
= url_ftell(&matroska
->ctx
->pb
);
2371 if ((res
= ebml_read_binary(matroska
, &id
, &data
, &size
)) < 0)
2375 /* first byte(s): blocknum */
2376 if ((n
= matroska_ebmlnum_uint(data
, size
, &num
)) < 0) {
2377 av_log(matroska
->ctx
, AV_LOG_ERROR
,
2378 "EBML block data error\n");
2385 /* fetch track from num */
2386 track
= matroska_find_track_by_num(matroska
, num
);
2387 if (size
<= 3 || track
< 0 || track
>= matroska
->num_tracks
) {
2388 av_log(matroska
->ctx
, AV_LOG_INFO
,
2389 "Invalid stream %d or size %u\n", track
, size
);
2393 if(matroska
->ctx
->streams
[ matroska
->tracks
[track
]->stream_index
]->discard
>= AVDISCARD_ALL
){
2398 /* time (relative to cluster time) */
2399 time
= ((data
[0] << 8) | data
[1]) * matroska
->time_scale
;
2405 switch ((flags
& 0x06) >> 1) {
2406 case 0x0: /* no lacing */
2408 lace_size
= av_mallocz(sizeof(int));
2409 lace_size
[0] = size
;
2412 case 0x1: /* xiph lacing */
2413 case 0x2: /* fixed-size lacing */
2414 case 0x3: /* EBML lacing */
2419 laces
= (*data
) + 1;
2422 lace_size
= av_mallocz(laces
* sizeof(int));
2424 switch ((flags
& 0x06) >> 1) {
2425 case 0x1: /* xiph lacing */ {
2428 for (n
= 0; res
== 0 && n
< laces
- 1; n
++) {
2435 lace_size
[n
] += temp
;
2441 total
+= lace_size
[n
];
2443 lace_size
[n
] = size
- total
;
2447 case 0x2: /* fixed-size lacing */
2448 for (n
= 0; n
< laces
; n
++)
2449 lace_size
[n
] = size
/ laces
;
2452 case 0x3: /* EBML lacing */ {
2454 n
= matroska_ebmlnum_uint(data
, size
, &num
);
2456 av_log(matroska
->ctx
, AV_LOG_INFO
,
2457 "EBML block data error\n");
2462 total
= lace_size
[0] = num
;
2463 for (n
= 1; res
== 0 && n
< laces
- 1; n
++) {
2466 r
= matroska_ebmlnum_sint (data
, size
,
2469 av_log(matroska
->ctx
, AV_LOG_INFO
,
2470 "EBML block data error\n");
2475 lace_size
[n
] = lace_size
[n
- 1] + snum
;
2476 total
+= lace_size
[n
];
2478 lace_size
[n
] = size
- total
;
2486 for (n
= 0; n
< laces
; n
++) {
2487 uint64_t timecode
= 0;
2489 pkt
= av_mallocz(sizeof(AVPacket
));
2490 /* XXX: prevent data copy... */
2491 if (av_new_packet(pkt
,lace_size
[n
]) < 0) {
2492 res
= AVERROR_NOMEM
;
2495 if (cluster_time
!= (uint64_t)-1) {
2496 if (time
< 0 && (-time
) > cluster_time
)
2497 timecode
= cluster_time
;
2499 timecode
= cluster_time
+ time
;
2501 /* FIXME: duration */
2503 memcpy(pkt
->data
, data
, lace_size
[n
]);
2504 data
+= lace_size
[n
];
2506 pkt
->flags
= is_keyframe
;
2508 matroska
->tracks
[track
]->stream_index
;
2510 pkt
->pts
= timecode
/ 1000000; /* ns to ms */
2513 matroska_queue_packet(matroska
, pkt
);
2522 case MATROSKA_ID_BLOCKDURATION
: {
2524 if ((res
= ebml_read_uint(matroska
, &id
, &num
)) < 0)
2526 av_log(matroska
->ctx
, AV_LOG_INFO
,
2527 "FIXME: implement support for BlockDuration\n");
2531 case MATROSKA_ID_BLOCKREFERENCE
:
2532 /* We've found a reference, so not even the first frame in
2533 * the lace is a key frame. */
2535 if (last_num_packets
!= matroska
->num_packets
)
2536 matroska
->packets
[last_num_packets
]->flags
= 0;
2537 res
= ebml_read_skip(matroska
);
2541 av_log(matroska
->ctx
, AV_LOG_INFO
,
2542 "Unknown entry 0x%x in blockgroup data\n", id
);
2546 res
= ebml_read_skip(matroska
);
2550 if (matroska
->level_up
) {
2551 matroska
->level_up
--;
2560 matroska_parse_cluster (MatroskaDemuxContext
*matroska
)
2564 uint64_t cluster_time
= 0;
2566 av_log(matroska
->ctx
, AV_LOG_DEBUG
,
2567 "parsing cluster at %lld\n", url_ftell(&matroska
->ctx
->pb
));
2570 if (!(id
= ebml_peek_id(matroska
, &matroska
->level_up
))) {
2573 } else if (matroska
->level_up
) {
2574 matroska
->level_up
--;
2579 /* cluster timecode */
2580 case MATROSKA_ID_CLUSTERTIMECODE
: {
2582 if ((res
= ebml_read_uint(matroska
, &id
, &num
)) < 0)
2584 cluster_time
= num
* matroska
->time_scale
;
2588 /* a group of blocks inside a cluster */
2589 case MATROSKA_ID_BLOCKGROUP
:
2590 if ((res
= ebml_read_master(matroska
, &id
)) < 0)
2592 res
= matroska_parse_blockgroup(matroska
, cluster_time
);
2596 av_log(matroska
->ctx
, AV_LOG_INFO
,
2597 "Unknown entry 0x%x in cluster data\n", id
);
2601 res
= ebml_read_skip(matroska
);
2605 if (matroska
->level_up
) {
2606 matroska
->level_up
--;
2615 matroska_read_packet (AVFormatContext
*s
,
2618 MatroskaDemuxContext
*matroska
= s
->priv_data
;
2622 /* Do we still have a packet queued? */
2623 if (matroska_deliver_packet(matroska
, pkt
) == 0)
2626 /* Have we already reached the end? */
2631 if (!(id
= ebml_peek_id(matroska
, &matroska
->level_up
))) {
2634 } else if (matroska
->level_up
) {
2635 matroska
->level_up
--;
2640 case MATROSKA_ID_CLUSTER
:
2641 if ((res
= ebml_read_master(matroska
, &id
)) < 0)
2643 if ((res
= matroska_parse_cluster(matroska
)) == 0)
2644 res
= 1; /* Parsed one cluster, let's get out. */
2649 res
= ebml_read_skip(matroska
);
2653 if (matroska
->level_up
) {
2654 matroska
->level_up
--;
2662 return matroska_deliver_packet(matroska
, pkt
);
2666 matroska_read_close (AVFormatContext
*s
)
2668 MatroskaDemuxContext
*matroska
= s
->priv_data
;
2671 if (matroska
->writing_app
)
2672 av_free(matroska
->writing_app
);
2673 if (matroska
->muxing_app
)
2674 av_free(matroska
->muxing_app
);
2675 if (matroska
->index
)
2676 av_free(matroska
->index
);
2678 if (matroska
->packets
!= NULL
) {
2679 for (n
= 0; n
< matroska
->num_packets
; n
++) {
2680 av_free_packet(matroska
->packets
[n
]);
2681 av_free(matroska
->packets
[n
]);
2683 av_free(matroska
->packets
);
2686 for (n
= 0; n
< matroska
->num_tracks
; n
++) {
2687 MatroskaTrack
*track
= matroska
->tracks
[n
];
2688 if (track
->codec_id
)
2689 av_free(track
->codec_id
);
2690 if (track
->codec_name
)
2691 av_free(track
->codec_name
);
2692 if (track
->codec_priv
)
2693 av_free(track
->codec_priv
);
2695 av_free(track
->name
);
2696 if (track
->language
)
2697 av_free(track
->language
);
2702 for (n
= 0; n
< s
->nb_streams
; n
++) {
2703 av_free(s
->streams
[n
]->codec
->extradata
);
2706 memset(matroska
, 0, sizeof(MatroskaDemuxContext
));
2711 static AVInputFormat matroska_iformat
= {
2713 "Matroska file format",
2714 sizeof(MatroskaDemuxContext
),
2716 matroska_read_header
,
2717 matroska_read_packet
,
2718 matroska_read_close
,
2724 av_register_input_format(&matroska_iformat
);