cea0ee04f1cf5714b2aa2157c860f70670235f93
[libav.git] / libavformat / id3v2.c
1 /*
2 * ID3v2 header parser
3 * Copyright (c) 2003 Fabrice Bellard
4 *
5 * This file is part of Libav.
6 *
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 #include "id3v2.h"
23 #include "id3v1.h"
24 #include "libavutil/avstring.h"
25 #include "libavutil/intreadwrite.h"
26 #include "libavutil/dict.h"
27 #include "avio_internal.h"
28
29 int ff_id3v2_match(const uint8_t *buf, const char * magic)
30 {
31 return buf[0] == magic[0] &&
32 buf[1] == magic[1] &&
33 buf[2] == magic[2] &&
34 buf[3] != 0xff &&
35 buf[4] != 0xff &&
36 (buf[6] & 0x80) == 0 &&
37 (buf[7] & 0x80) == 0 &&
38 (buf[8] & 0x80) == 0 &&
39 (buf[9] & 0x80) == 0;
40 }
41
42 int ff_id3v2_tag_len(const uint8_t * buf)
43 {
44 int len = ((buf[6] & 0x7f) << 21) +
45 ((buf[7] & 0x7f) << 14) +
46 ((buf[8] & 0x7f) << 7) +
47 (buf[9] & 0x7f) +
48 ID3v2_HEADER_SIZE;
49 if (buf[5] & 0x10)
50 len += ID3v2_HEADER_SIZE;
51 return len;
52 }
53
54 static unsigned int get_size(AVIOContext *s, int len)
55 {
56 int v = 0;
57 while (len--)
58 v = (v << 7) + (avio_r8(s) & 0x7F);
59 return v;
60 }
61
62 /**
63 * Free GEOB type extra metadata.
64 */
65 static void free_geobtag(ID3v2ExtraMetaGEOB *geob)
66 {
67 av_free(geob->mime_type);
68 av_free(geob->file_name);
69 av_free(geob->description);
70 av_free(geob->data);
71 av_free(geob);
72 }
73
74 /**
75 * Decode characters to UTF-8 according to encoding type. The decoded buffer is
76 * always null terminated. Stop reading when either *maxread bytes are read from
77 * pb or U+0000 character is found.
78 *
79 * @param dst Pointer where the address of the buffer with the decoded bytes is
80 * stored. Buffer must be freed by caller.
81 * @param maxread Pointer to maximum number of characters to read from the
82 * AVIOContext. After execution the value is decremented by the number of bytes
83 * actually read.
84 * @returns 0 if no error occured, dst is uninitialized on error
85 */
86 static int decode_str(AVFormatContext *s, AVIOContext *pb, int encoding,
87 uint8_t **dst, int *maxread)
88 {
89 int ret;
90 uint8_t tmp;
91 uint32_t ch = 1;
92 int left = *maxread;
93 unsigned int (*get)(AVIOContext*) = avio_rb16;
94 AVIOContext *dynbuf;
95
96 if ((ret = avio_open_dyn_buf(&dynbuf)) < 0) {
97 av_log(s, AV_LOG_ERROR, "Error opening memory stream\n");
98 return ret;
99 }
100
101 switch (encoding) {
102
103 case ID3v2_ENCODING_ISO8859:
104 while (left && ch) {
105 ch = avio_r8(pb);
106 PUT_UTF8(ch, tmp, avio_w8(dynbuf, tmp);)
107 left--;
108 }
109 break;
110
111 case ID3v2_ENCODING_UTF16BOM:
112 if ((left -= 2) < 0) {
113 av_log(s, AV_LOG_ERROR, "Cannot read BOM value, input too short\n");
114 avio_close_dyn_buf(dynbuf, dst);
115 av_freep(dst);
116 return AVERROR_INVALIDDATA;
117 }
118 switch (avio_rb16(pb)) {
119 case 0xfffe:
120 get = avio_rl16;
121 case 0xfeff:
122 break;
123 default:
124 av_log(s, AV_LOG_ERROR, "Incorrect BOM value\n");
125 avio_close_dyn_buf(dynbuf, dst);
126 av_freep(dst);
127 *maxread = left;
128 return AVERROR_INVALIDDATA;
129 }
130 // fall-through
131
132 case ID3v2_ENCODING_UTF16BE:
133 while ((left > 1) && ch) {
134 GET_UTF16(ch, ((left -= 2) >= 0 ? get(pb) : 0), break;)
135 PUT_UTF8(ch, tmp, avio_w8(dynbuf, tmp);)
136 }
137 if (left < 0)
138 left += 2; /* did not read last char from pb */
139 break;
140
141 case ID3v2_ENCODING_UTF8:
142 while (left && ch) {
143 ch = avio_r8(pb);
144 avio_w8(dynbuf, ch);
145 left--;
146 }
147 break;
148 default:
149 av_log(s, AV_LOG_WARNING, "Unknown encoding\n");
150 }
151
152 if (ch)
153 avio_w8(dynbuf, 0);
154
155 avio_close_dyn_buf(dynbuf, dst);
156 *maxread = left;
157
158 return 0;
159 }
160
161 /**
162 * Parse a text tag.
163 */
164 static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen, const char *key)
165 {
166 uint8_t *dst;
167 int encoding, dict_flags = AV_DICT_DONT_OVERWRITE;
168 unsigned genre;
169
170 if (taglen < 1)
171 return;
172
173 encoding = avio_r8(pb);
174 taglen--; /* account for encoding type byte */
175
176 if (decode_str(s, pb, encoding, &dst, &taglen) < 0) {
177 av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", key);
178 return;
179 }
180
181 if (!(strcmp(key, "TCON") && strcmp(key, "TCO"))
182 && (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 1)
183 && genre <= ID3v1_GENRE_MAX) {
184 av_freep(&dst);
185 dst = ff_id3v1_genre_str[genre];
186 } else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX"))) {
187 /* dst now contains the key, need to get value */
188 key = dst;
189 if (decode_str(s, pb, encoding, &dst, &taglen) < 0) {
190 av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", key);
191 av_freep(&key);
192 return;
193 }
194 dict_flags |= AV_DICT_DONT_STRDUP_VAL | AV_DICT_DONT_STRDUP_KEY;
195 }
196 else if (*dst)
197 dict_flags |= AV_DICT_DONT_STRDUP_VAL;
198
199 if (dst)
200 av_dict_set(&s->metadata, key, dst, dict_flags);
201 }
202
203 /**
204 * Parse GEOB tag into a ID3v2ExtraMetaGEOB struct.
205 */
206 static void read_geobtag(AVFormatContext *s, AVIOContext *pb, int taglen, char *tag, ID3v2ExtraMeta **extra_meta)
207 {
208 ID3v2ExtraMetaGEOB *geob_data = NULL;
209 ID3v2ExtraMeta *new_extra = NULL;
210 char encoding;
211 unsigned int len;
212
213 if (taglen < 1)
214 return;
215
216 geob_data = av_mallocz(sizeof(ID3v2ExtraMetaGEOB));
217 if (!geob_data) {
218 av_log(s, AV_LOG_ERROR, "Failed to alloc %zu bytes\n", sizeof(ID3v2ExtraMetaGEOB));
219 return;
220 }
221
222 new_extra = av_mallocz(sizeof(ID3v2ExtraMeta));
223 if (!new_extra) {
224 av_log(s, AV_LOG_ERROR, "Failed to alloc %zu bytes\n", sizeof(ID3v2ExtraMeta));
225 goto fail;
226 }
227
228 /* read encoding type byte */
229 encoding = avio_r8(pb);
230 taglen--;
231
232 /* read MIME type (always ISO-8859) */
233 if (decode_str(s, pb, ID3v2_ENCODING_ISO8859, &geob_data->mime_type, &taglen) < 0
234 || taglen <= 0)
235 goto fail;
236
237 /* read file name */
238 if (decode_str(s, pb, encoding, &geob_data->file_name, &taglen) < 0
239 || taglen <= 0)
240 goto fail;
241
242 /* read content description */
243 if (decode_str(s, pb, encoding, &geob_data->description, &taglen) < 0
244 || taglen < 0)
245 goto fail;
246
247 if (taglen) {
248 /* save encapsulated binary data */
249 geob_data->data = av_malloc(taglen);
250 if (!geob_data->data) {
251 av_log(s, AV_LOG_ERROR, "Failed to alloc %d bytes\n", taglen);
252 goto fail;
253 }
254 if ((len = avio_read(pb, geob_data->data, taglen)) < taglen)
255 av_log(s, AV_LOG_WARNING, "Error reading GEOB frame, data truncated.\n");
256 geob_data->datasize = len;
257 } else {
258 geob_data->data = NULL;
259 geob_data->datasize = 0;
260 }
261
262 /* add data to the list */
263 new_extra->tag = "GEOB";
264 new_extra->data = geob_data;
265 new_extra->next = *extra_meta;
266 *extra_meta = new_extra;
267
268 return;
269
270 fail:
271 av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", tag);
272 free_geobtag(geob_data);
273 av_free(new_extra);
274 return;
275 }
276
277 static int is_number(const char *str)
278 {
279 while (*str >= '0' && *str <= '9') str++;
280 return !*str;
281 }
282
283 static AVDictionaryEntry* get_date_tag(AVDictionary *m, const char *tag)
284 {
285 AVDictionaryEntry *t;
286 if ((t = av_dict_get(m, tag, NULL, AV_DICT_MATCH_CASE)) &&
287 strlen(t->value) == 4 && is_number(t->value))
288 return t;
289 return NULL;
290 }
291
292 static void merge_date(AVDictionary **m)
293 {
294 AVDictionaryEntry *t;
295 char date[17] = {0}; // YYYY-MM-DD hh:mm
296
297 if (!(t = get_date_tag(*m, "TYER")) &&
298 !(t = get_date_tag(*m, "TYE")))
299 return;
300 av_strlcpy(date, t->value, 5);
301 av_dict_set(m, "TYER", NULL, 0);
302 av_dict_set(m, "TYE", NULL, 0);
303
304 if (!(t = get_date_tag(*m, "TDAT")) &&
305 !(t = get_date_tag(*m, "TDA")))
306 goto finish;
307 snprintf(date + 4, sizeof(date) - 4, "-%.2s-%.2s", t->value + 2, t->value);
308 av_dict_set(m, "TDAT", NULL, 0);
309 av_dict_set(m, "TDA", NULL, 0);
310
311 if (!(t = get_date_tag(*m, "TIME")) &&
312 !(t = get_date_tag(*m, "TIM")))
313 goto finish;
314 snprintf(date + 10, sizeof(date) - 10, " %.2s:%.2s", t->value, t->value + 2);
315 av_dict_set(m, "TIME", NULL, 0);
316 av_dict_set(m, "TIM", NULL, 0);
317
318 finish:
319 if (date[0])
320 av_dict_set(m, "date", date, 0);
321 }
322
323 /**
324 * Get the corresponding ID3v2EMFunc struct for a tag.
325 * @param isv34 Determines if v2.2 or v2.3/4 strings are used
326 * @return A pointer to the ID3v2EMFunc struct if found, NULL otherwise.
327 */
328 static const ID3v2EMFunc *get_extra_meta_func(const char *tag, int isv34)
329 {
330 int i = 0;
331 while (ff_id3v2_extra_meta_funcs[i].tag3) {
332 if (!memcmp(tag,
333 (isv34 ?
334 ff_id3v2_extra_meta_funcs[i].tag4 :
335 ff_id3v2_extra_meta_funcs[i].tag3),
336 (isv34 ? 4 : 3)))
337 return &ff_id3v2_extra_meta_funcs[i];
338 i++;
339 }
340 return NULL;
341 }
342
343 static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t flags, ID3v2ExtraMeta **extra_meta)
344 {
345 int isv34, tlen, unsync;
346 char tag[5];
347 int64_t next, end = avio_tell(s->pb) + len;
348 int taghdrlen;
349 const char *reason = NULL;
350 AVIOContext pb;
351 AVIOContext *pbx;
352 unsigned char *buffer = NULL;
353 int buffer_size = 0;
354 const ID3v2EMFunc *extra_func;
355
356 switch (version) {
357 case 2:
358 if (flags & 0x40) {
359 reason = "compression";
360 goto error;
361 }
362 isv34 = 0;
363 taghdrlen = 6;
364 break;
365
366 case 3:
367 case 4:
368 isv34 = 1;
369 taghdrlen = 10;
370 break;
371
372 default:
373 reason = "version";
374 goto error;
375 }
376
377 unsync = flags & 0x80;
378
379 if (isv34 && flags & 0x40) /* Extended header present, just skip over it */
380 avio_skip(s->pb, get_size(s->pb, 4));
381
382 while (len >= taghdrlen) {
383 unsigned int tflags = 0;
384 int tunsync = 0;
385
386 if (isv34) {
387 avio_read(s->pb, tag, 4);
388 tag[4] = 0;
389 if(version==3){
390 tlen = avio_rb32(s->pb);
391 }else
392 tlen = get_size(s->pb, 4);
393 tflags = avio_rb16(s->pb);
394 tunsync = tflags & ID3v2_FLAG_UNSYNCH;
395 } else {
396 avio_read(s->pb, tag, 3);
397 tag[3] = 0;
398 tlen = avio_rb24(s->pb);
399 }
400 if (tlen < 0 || tlen > len - taghdrlen) {
401 av_log(s, AV_LOG_WARNING, "Invalid size in frame %s, skipping the rest of tag.\n", tag);
402 break;
403 }
404 len -= taghdrlen + tlen;
405 next = avio_tell(s->pb) + tlen;
406
407 if (!tlen) {
408 if (tag[0])
409 av_log(s, AV_LOG_DEBUG, "Invalid empty frame %s, skipping.\n", tag);
410 continue;
411 }
412
413 if (tflags & ID3v2_FLAG_DATALEN) {
414 avio_rb32(s->pb);
415 tlen -= 4;
416 }
417
418 if (tflags & (ID3v2_FLAG_ENCRYPTION | ID3v2_FLAG_COMPRESSION)) {
419 av_log(s, AV_LOG_WARNING, "Skipping encrypted/compressed ID3v2 frame %s.\n", tag);
420 avio_skip(s->pb, tlen);
421 /* check for text tag or supported special meta tag */
422 } else if (tag[0] == 'T' || (extra_meta && (extra_func = get_extra_meta_func(tag, isv34)))) {
423 if (unsync || tunsync) {
424 int i, j;
425 av_fast_malloc(&buffer, &buffer_size, tlen);
426 if (!buffer) {
427 av_log(s, AV_LOG_ERROR, "Failed to alloc %d bytes\n", tlen);
428 goto seek;
429 }
430 for (i = 0, j = 0; i < tlen; i++, j++) {
431 buffer[j] = avio_r8(s->pb);
432 if (j > 0 && !buffer[j] && buffer[j - 1] == 0xff) {
433 /* Unsynchronised byte, skip it */
434 j--;
435 }
436 }
437 ffio_init_context(&pb, buffer, j, 0, NULL, NULL, NULL, NULL);
438 tlen = j;
439 pbx = &pb; // read from sync buffer
440 } else {
441 pbx = s->pb; // read straight from input
442 }
443 if (tag[0] == 'T')
444 /* parse text tag */
445 read_ttag(s, pbx, tlen, tag);
446 else
447 /* parse special meta tag */
448 extra_func->read(s, pbx, tlen, tag, extra_meta);
449 }
450 else if (!tag[0]) {
451 if (tag[1])
452 av_log(s, AV_LOG_WARNING, "invalid frame id, assuming padding");
453 avio_skip(s->pb, tlen);
454 break;
455 }
456 /* Skip to end of tag */
457 seek:
458 avio_seek(s->pb, next, SEEK_SET);
459 }
460
461 if (version == 4 && flags & 0x10) /* Footer preset, always 10 bytes, skip over it */
462 end += 10;
463
464 error:
465 if (reason)
466 av_log(s, AV_LOG_INFO, "ID3v2.%d tag skipped, cannot handle %s\n", version, reason);
467 avio_seek(s->pb, end, SEEK_SET);
468 av_free(buffer);
469 return;
470 }
471
472 void ff_id3v2_read_all(AVFormatContext *s, const char *magic, ID3v2ExtraMeta **extra_meta)
473 {
474 int len, ret;
475 uint8_t buf[ID3v2_HEADER_SIZE];
476 int found_header;
477 int64_t off;
478
479 do {
480 /* save the current offset in case there's nothing to read/skip */
481 off = avio_tell(s->pb);
482 ret = avio_read(s->pb, buf, ID3v2_HEADER_SIZE);
483 if (ret != ID3v2_HEADER_SIZE)
484 break;
485 found_header = ff_id3v2_match(buf, magic);
486 if (found_header) {
487 /* parse ID3v2 header */
488 len = ((buf[6] & 0x7f) << 21) |
489 ((buf[7] & 0x7f) << 14) |
490 ((buf[8] & 0x7f) << 7) |
491 (buf[9] & 0x7f);
492 ff_id3v2_parse(s, len, buf[3], buf[5], extra_meta);
493 } else {
494 avio_seek(s->pb, off, SEEK_SET);
495 }
496 } while (found_header);
497 ff_metadata_conv(&s->metadata, NULL, ff_id3v2_34_metadata_conv);
498 ff_metadata_conv(&s->metadata, NULL, ff_id3v2_2_metadata_conv);
499 ff_metadata_conv(&s->metadata, NULL, ff_id3v2_4_metadata_conv);
500 merge_date(&s->metadata);
501 }
502
503 void ff_id3v2_read(AVFormatContext *s, const char *magic)
504 {
505 ff_id3v2_read_all(s, magic, NULL);
506 }
507
508 void ff_id3v2_free_extra_meta(ID3v2ExtraMeta **extra_meta)
509 {
510 ID3v2ExtraMeta *current = *extra_meta, *next;
511 const ID3v2EMFunc *extra_func;
512
513 while (current) {
514 if ((extra_func = get_extra_meta_func(current->tag, 1)))
515 extra_func->free(current->data);
516 next = current->next;
517 av_freep(&current);
518 current = next;
519 }
520 }
521
522 const ID3v2EMFunc ff_id3v2_extra_meta_funcs[] = {
523 { "GEO", "GEOB", read_geobtag, free_geobtag },
524 { NULL }
525 };
526
527 const AVMetadataConv ff_id3v2_34_metadata_conv[] = {
528 { "TALB", "album"},
529 { "TCOM", "composer"},
530 { "TCON", "genre"},
531 { "TCOP", "copyright"},
532 { "TENC", "encoded_by"},
533 { "TIT2", "title"},
534 { "TLAN", "language"},
535 { "TPE1", "artist"},
536 { "TPE2", "album_artist"},
537 { "TPE3", "performer"},
538 { "TPOS", "disc"},
539 { "TPUB", "publisher"},
540 { "TRCK", "track"},
541 { "TSSE", "encoder"},
542 { 0 }
543 };
544
545 const AVMetadataConv ff_id3v2_4_metadata_conv[] = {
546 { "TDRL", "date"},
547 { "TDRC", "date"},
548 { "TDEN", "creation_time"},
549 { "TSOA", "album-sort"},
550 { "TSOP", "artist-sort"},
551 { "TSOT", "title-sort"},
552 { 0 }
553 };
554
555 const AVMetadataConv ff_id3v2_2_metadata_conv[] = {
556 { "TAL", "album"},
557 { "TCO", "genre"},
558 { "TT2", "title"},
559 { "TEN", "encoded_by"},
560 { "TP1", "artist"},
561 { "TP2", "album_artist"},
562 { "TP3", "performer"},
563 { "TRK", "track"},
564 { 0 }
565 };
566
567
568 const char ff_id3v2_tags[][4] = {
569 "TALB", "TBPM", "TCOM", "TCON", "TCOP", "TDLY", "TENC", "TEXT",
570 "TFLT", "TIT1", "TIT2", "TIT3", "TKEY", "TLAN", "TLEN", "TMED",
571 "TOAL", "TOFN", "TOLY", "TOPE", "TOWN", "TPE1", "TPE2", "TPE3",
572 "TPE4", "TPOS", "TPUB", "TRCK", "TRSN", "TRSO", "TSRC", "TSSE",
573 { 0 },
574 };
575
576 const char ff_id3v2_4_tags[][4] = {
577 "TDEN", "TDOR", "TDRC", "TDRL", "TDTG", "TIPL", "TMCL", "TMOO",
578 "TPRO", "TSOA", "TSOP", "TSOT", "TSST",
579 { 0 },
580 };
581
582 const char ff_id3v2_3_tags[][4] = {
583 "TDAT", "TIME", "TORY", "TRDA", "TSIZ", "TYER",
584 { 0 },
585 };