Skip short padding in id3v2.
[libav.git] / libavformat / id3v2.c
1 /*
2 * ID3v2 header parser
3 * Copyright (c) 2003 Fabrice Bellard
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 #include "id3v2.h"
23 #include "id3v1.h"
24 #include "libavutil/avstring.h"
25 #include "libavutil/intreadwrite.h"
26
27 int ff_id3v2_match(const uint8_t *buf, const char * magic)
28 {
29 return buf[0] == magic[0] &&
30 buf[1] == magic[1] &&
31 buf[2] == magic[2] &&
32 buf[3] != 0xff &&
33 buf[4] != 0xff &&
34 (buf[6] & 0x80) == 0 &&
35 (buf[7] & 0x80) == 0 &&
36 (buf[8] & 0x80) == 0 &&
37 (buf[9] & 0x80) == 0;
38 }
39
40 int ff_id3v2_tag_len(const uint8_t * buf)
41 {
42 int len = ((buf[6] & 0x7f) << 21) +
43 ((buf[7] & 0x7f) << 14) +
44 ((buf[8] & 0x7f) << 7) +
45 (buf[9] & 0x7f) +
46 ID3v2_HEADER_SIZE;
47 if (buf[5] & 0x10)
48 len += ID3v2_HEADER_SIZE;
49 return len;
50 }
51
52 void ff_id3v2_read(AVFormatContext *s, const char *magic)
53 {
54 int len, ret;
55 uint8_t buf[ID3v2_HEADER_SIZE];
56
57 ret = get_buffer(s->pb, buf, ID3v2_HEADER_SIZE);
58 if (ret != ID3v2_HEADER_SIZE)
59 return;
60 if (ff_id3v2_match(buf, magic)) {
61 /* parse ID3v2 header */
62 len = ((buf[6] & 0x7f) << 21) |
63 ((buf[7] & 0x7f) << 14) |
64 ((buf[8] & 0x7f) << 7) |
65 (buf[9] & 0x7f);
66 ff_id3v2_parse(s, len, buf[3], buf[5]);
67 } else {
68 url_fseek(s->pb, 0, SEEK_SET);
69 }
70 }
71
72 static unsigned int get_size(ByteIOContext *s, int len)
73 {
74 int v = 0;
75 while (len--)
76 v = (v << 7) + (get_byte(s) & 0x7F);
77 return v;
78 }
79
80 static void read_ttag(AVFormatContext *s, int taglen, const char *key)
81 {
82 char *q, dst[512];
83 const char *val = NULL;
84 int len, dstlen = sizeof(dst) - 1;
85 unsigned genre;
86 unsigned int (*get)(ByteIOContext*) = get_be16;
87
88 dst[0] = 0;
89 if (taglen < 1)
90 return;
91
92 taglen--; /* account for encoding type byte */
93
94 switch (get_byte(s->pb)) { /* encoding type */
95
96 case 0: /* ISO-8859-1 (0 - 255 maps directly into unicode) */
97 q = dst;
98 while (taglen-- && q - dst < dstlen - 7) {
99 uint8_t tmp;
100 PUT_UTF8(get_byte(s->pb), tmp, *q++ = tmp;)
101 }
102 *q = 0;
103 break;
104
105 case 1: /* UTF-16 with BOM */
106 taglen -= 2;
107 switch (get_be16(s->pb)) {
108 case 0xfffe:
109 get = get_le16;
110 case 0xfeff:
111 break;
112 default:
113 av_log(s, AV_LOG_ERROR, "Incorrect BOM value in tag %s.\n", key);
114 return;
115 }
116 // fall-through
117
118 case 2: /* UTF-16BE without BOM */
119 q = dst;
120 while (taglen > 1 && q - dst < dstlen - 7) {
121 uint32_t ch;
122 uint8_t tmp;
123
124 GET_UTF16(ch, ((taglen -= 2) >= 0 ? get(s->pb) : 0), break;)
125 PUT_UTF8(ch, tmp, *q++ = tmp;)
126 }
127 *q = 0;
128 break;
129
130 case 3: /* UTF-8 */
131 len = FFMIN(taglen, dstlen);
132 get_buffer(s->pb, dst, len);
133 dst[len] = 0;
134 break;
135 default:
136 av_log(s, AV_LOG_WARNING, "Unknown encoding in tag %s\n.", key);
137 }
138
139 if (!(strcmp(key, "TCON") && strcmp(key, "TCO"))
140 && (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 1)
141 && genre <= ID3v1_GENRE_MAX)
142 val = ff_id3v1_genre_str[genre];
143 else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX"))) {
144 /* dst now contains two 0-terminated strings */
145 dst[dstlen] = 0;
146 len = strlen(dst);
147 key = dst;
148 val = dst + FFMIN(len + 1, dstlen);
149 }
150 else if (*dst)
151 val = dst;
152
153 if (val)
154 av_metadata_set2(&s->metadata, key, val, 0);
155 }
156
157 void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t flags)
158 {
159 int isv34, tlen;
160 char tag[5];
161 int64_t next;
162 int taghdrlen;
163 const char *reason;
164
165 switch (version) {
166 case 2:
167 if (flags & 0x40) {
168 reason = "compression";
169 goto error;
170 }
171 isv34 = 0;
172 taghdrlen = 6;
173 break;
174
175 case 3:
176 case 4:
177 isv34 = 1;
178 taghdrlen = 10;
179 break;
180
181 default:
182 reason = "version";
183 goto error;
184 }
185
186 if (flags & 0x80) {
187 reason = "unsynchronization";
188 goto error;
189 }
190
191 if (isv34 && flags & 0x40) /* Extended header present, just skip over it */
192 url_fskip(s->pb, get_size(s->pb, 4));
193
194 while (len >= taghdrlen) {
195 if (isv34) {
196 get_buffer(s->pb, tag, 4);
197 tag[4] = 0;
198 if(version==3){
199 tlen = get_be32(s->pb);
200 }else
201 tlen = get_size(s->pb, 4);
202 get_be16(s->pb); /* flags */
203 } else {
204 get_buffer(s->pb, tag, 3);
205 tag[3] = 0;
206 tlen = get_be24(s->pb);
207 }
208 len -= taghdrlen + tlen;
209
210 if (len < 0)
211 break;
212
213 next = url_ftell(s->pb) + tlen;
214
215 if (tag[0] == 'T')
216 read_ttag(s, tlen, tag);
217 else if (!tag[0]) {
218 if (tag[1])
219 av_log(s, AV_LOG_WARNING, "invalid frame id, assuming padding");
220 url_fskip(s->pb, tlen);
221 break;
222 }
223 /* Skip to end of tag */
224 url_fseek(s->pb, next, SEEK_SET);
225 }
226
227 if (len > 0) {
228 /* Skip padding */
229 url_fskip(s->pb, len);
230 }
231 if (version == 4 && flags & 0x10) /* Footer preset, always 10 bytes, skip over it */
232 url_fskip(s->pb, 10);
233 return;
234
235 error:
236 av_log(s, AV_LOG_INFO, "ID3v2.%d tag skipped, cannot handle %s\n", version, reason);
237 url_fskip(s->pb, len);
238 }
239
240 const AVMetadataConv ff_id3v2_metadata_conv[] = {
241 { "TALB", "album"},
242 { "TAL", "album"},
243 { "TCOM", "composer"},
244 { "TCON", "genre"},
245 { "TCO", "genre"},
246 { "TCOP", "copyright"},
247 { "TDRL", "date"},
248 { "TDRC", "date"},
249 { "TENC", "encoded_by"},
250 { "TEN", "encoded_by"},
251 { "TIT2", "title"},
252 { "TT2", "title"},
253 { "TLAN", "language"},
254 { "TPE1", "artist"},
255 { "TP1", "artist"},
256 { "TPE2", "album_artist"},
257 { "TP2", "album_artist"},
258 { "TPE3", "performer"},
259 { "TP3", "performer"},
260 { "TPOS", "disc"},
261 { "TPUB", "publisher"},
262 { "TRCK", "track"},
263 { "TRK", "track"},
264 { "TSOA", "album-sort"},
265 { "TSOP", "artist-sort"},
266 { "TSOT", "title-sort"},
267 { "TSSE", "encoder"},
268 { 0 }
269 };
270
271 const char ff_id3v2_tags[][4] = {
272 "TALB", "TBPM", "TCOM", "TCON", "TCOP", "TDEN", "TDLY", "TDOR", "TDRC",
273 "TDRL", "TDTG", "TENC", "TEXT", "TFLT", "TIPL", "TIT1", "TIT2", "TIT3",
274 "TKEY", "TLAN", "TLEN", "TMCL", "TMED", "TMOO", "TOAL", "TOFN", "TOLY",
275 "TOPE", "TOWN", "TPE1", "TPE2", "TPE3", "TPE4", "TPOS", "TPRO", "TPUB",
276 "TRCK", "TRSN", "TRSO", "TSOA", "TSOP", "TSOT", "TSRC", "TSSE", "TSST",
277 { 0 },
278 };