id3v2: use an enum for encodings instead of magic numbers.
[libav.git] / libavformat / id3v2.c
1 /*
2 * ID3v2 header parser
3 * Copyright (c) 2003 Fabrice Bellard
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 #include "id3v2.h"
23 #include "id3v1.h"
24 #include "libavutil/avstring.h"
25 #include "libavutil/intreadwrite.h"
26 #include "metadata.h"
27
28 int ff_id3v2_match(const uint8_t *buf, const char * magic)
29 {
30 return buf[0] == magic[0] &&
31 buf[1] == magic[1] &&
32 buf[2] == magic[2] &&
33 buf[3] != 0xff &&
34 buf[4] != 0xff &&
35 (buf[6] & 0x80) == 0 &&
36 (buf[7] & 0x80) == 0 &&
37 (buf[8] & 0x80) == 0 &&
38 (buf[9] & 0x80) == 0;
39 }
40
41 int ff_id3v2_tag_len(const uint8_t * buf)
42 {
43 int len = ((buf[6] & 0x7f) << 21) +
44 ((buf[7] & 0x7f) << 14) +
45 ((buf[8] & 0x7f) << 7) +
46 (buf[9] & 0x7f) +
47 ID3v2_HEADER_SIZE;
48 if (buf[5] & 0x10)
49 len += ID3v2_HEADER_SIZE;
50 return len;
51 }
52
53 static unsigned int get_size(ByteIOContext *s, int len)
54 {
55 int v = 0;
56 while (len--)
57 v = (v << 7) + (get_byte(s) & 0x7F);
58 return v;
59 }
60
61 static void read_ttag(AVFormatContext *s, ByteIOContext *pb, int taglen, const char *key)
62 {
63 char *q, dst[512];
64 const char *val = NULL;
65 int len, dstlen = sizeof(dst) - 1;
66 unsigned genre;
67 unsigned int (*get)(ByteIOContext*) = get_be16;
68
69 dst[0] = 0;
70 if (taglen < 1)
71 return;
72
73 taglen--; /* account for encoding type byte */
74
75 switch (get_byte(pb)) { /* encoding type */
76
77 case ID3v2_ENCODING_ISO8859:
78 q = dst;
79 while (taglen-- && q - dst < dstlen - 7) {
80 uint8_t tmp;
81 PUT_UTF8(get_byte(pb), tmp, *q++ = tmp;)
82 }
83 *q = 0;
84 break;
85
86 case ID3v2_ENCODING_UTF16BOM:
87 taglen -= 2;
88 switch (get_be16(pb)) {
89 case 0xfffe:
90 get = get_le16;
91 case 0xfeff:
92 break;
93 default:
94 av_log(s, AV_LOG_ERROR, "Incorrect BOM value in tag %s.\n", key);
95 return;
96 }
97 // fall-through
98
99 case ID3v2_ENCODING_UTF16BE:
100 q = dst;
101 while (taglen > 1 && q - dst < dstlen - 7) {
102 uint32_t ch;
103 uint8_t tmp;
104
105 GET_UTF16(ch, ((taglen -= 2) >= 0 ? get(pb) : 0), break;)
106 PUT_UTF8(ch, tmp, *q++ = tmp;)
107 }
108 *q = 0;
109 break;
110
111 case ID3v2_ENCODING_UTF8:
112 len = FFMIN(taglen, dstlen);
113 get_buffer(pb, dst, len);
114 dst[len] = 0;
115 break;
116 default:
117 av_log(s, AV_LOG_WARNING, "Unknown encoding in tag %s\n.", key);
118 }
119
120 if (!(strcmp(key, "TCON") && strcmp(key, "TCO"))
121 && (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 1)
122 && genre <= ID3v1_GENRE_MAX)
123 val = ff_id3v1_genre_str[genre];
124 else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX"))) {
125 /* dst now contains two 0-terminated strings */
126 dst[dstlen] = 0;
127 len = strlen(dst);
128 key = dst;
129 val = dst + FFMIN(len + 1, dstlen);
130 }
131 else if (*dst)
132 val = dst;
133
134 if (val)
135 av_metadata_set2(&s->metadata, key, val, AV_METADATA_DONT_OVERWRITE);
136 }
137
138 static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t flags)
139 {
140 int isv34, tlen, unsync;
141 char tag[5];
142 int64_t next;
143 int taghdrlen;
144 const char *reason;
145 ByteIOContext pb;
146 unsigned char *buffer = NULL;
147 int buffer_size = 0;
148
149 switch (version) {
150 case 2:
151 if (flags & 0x40) {
152 reason = "compression";
153 goto error;
154 }
155 isv34 = 0;
156 taghdrlen = 6;
157 break;
158
159 case 3:
160 case 4:
161 isv34 = 1;
162 taghdrlen = 10;
163 break;
164
165 default:
166 reason = "version";
167 goto error;
168 }
169
170 unsync = flags & 0x80;
171
172 if (isv34 && flags & 0x40) /* Extended header present, just skip over it */
173 url_fskip(s->pb, get_size(s->pb, 4));
174
175 while (len >= taghdrlen) {
176 unsigned int tflags;
177 int tunsync = 0;
178
179 if (isv34) {
180 get_buffer(s->pb, tag, 4);
181 tag[4] = 0;
182 if(version==3){
183 tlen = get_be32(s->pb);
184 }else
185 tlen = get_size(s->pb, 4);
186 tflags = get_be16(s->pb);
187 tunsync = tflags & ID3v2_FLAG_UNSYNCH;
188 } else {
189 get_buffer(s->pb, tag, 3);
190 tag[3] = 0;
191 tlen = get_be24(s->pb);
192 }
193 len -= taghdrlen + tlen;
194
195 if (len < 0)
196 break;
197
198 next = url_ftell(s->pb) + tlen;
199
200 if (tflags & ID3v2_FLAG_DATALEN) {
201 get_be32(s->pb);
202 tlen -= 4;
203 }
204
205 if (tflags & (ID3v2_FLAG_ENCRYPTION | ID3v2_FLAG_COMPRESSION)) {
206 av_log(s, AV_LOG_WARNING, "Skipping encrypted/compressed ID3v2 frame %s.\n", tag);
207 url_fskip(s->pb, tlen);
208 } else if (tag[0] == 'T') {
209 if (unsync || tunsync) {
210 int i, j;
211 av_fast_malloc(&buffer, &buffer_size, tlen);
212 for (i = 0, j = 0; i < tlen; i++, j++) {
213 buffer[j] = get_byte(s->pb);
214 if (j > 0 && !buffer[j] && buffer[j - 1] == 0xff) {
215 /* Unsynchronised byte, skip it */
216 j--;
217 }
218 }
219 init_put_byte(&pb, buffer, j, 0, NULL, NULL, NULL, NULL);
220 read_ttag(s, &pb, j, tag);
221 } else {
222 read_ttag(s, s->pb, tlen, tag);
223 }
224 }
225 else if (!tag[0]) {
226 if (tag[1])
227 av_log(s, AV_LOG_WARNING, "invalid frame id, assuming padding");
228 url_fskip(s->pb, tlen);
229 break;
230 }
231 /* Skip to end of tag */
232 url_fseek(s->pb, next, SEEK_SET);
233 }
234
235 if (len > 0) {
236 /* Skip padding */
237 url_fskip(s->pb, len);
238 }
239 if (version == 4 && flags & 0x10) /* Footer preset, always 10 bytes, skip over it */
240 url_fskip(s->pb, 10);
241
242 av_free(buffer);
243 return;
244
245 error:
246 av_log(s, AV_LOG_INFO, "ID3v2.%d tag skipped, cannot handle %s\n", version, reason);
247 url_fskip(s->pb, len);
248 av_free(buffer);
249 }
250
251 void ff_id3v2_read(AVFormatContext *s, const char *magic)
252 {
253 int len, ret;
254 uint8_t buf[ID3v2_HEADER_SIZE];
255 int found_header;
256 int64_t off;
257
258 do {
259 /* save the current offset in case there's nothing to read/skip */
260 off = url_ftell(s->pb);
261 ret = get_buffer(s->pb, buf, ID3v2_HEADER_SIZE);
262 if (ret != ID3v2_HEADER_SIZE)
263 break;
264 found_header = ff_id3v2_match(buf, magic);
265 if (found_header) {
266 /* parse ID3v2 header */
267 len = ((buf[6] & 0x7f) << 21) |
268 ((buf[7] & 0x7f) << 14) |
269 ((buf[8] & 0x7f) << 7) |
270 (buf[9] & 0x7f);
271 ff_id3v2_parse(s, len, buf[3], buf[5]);
272 } else {
273 url_fseek(s->pb, off, SEEK_SET);
274 }
275 } while (found_header);
276 ff_metadata_conv(&s->metadata, NULL, ff_id3v2_metadata_conv);
277 }
278
279 const AVMetadataConv ff_id3v2_metadata_conv[] = {
280 { "TALB", "album"},
281 { "TAL", "album"},
282 { "TCOM", "composer"},
283 { "TCON", "genre"},
284 { "TCO", "genre"},
285 { "TCOP", "copyright"},
286 { "TDRL", "date"},
287 { "TDRC", "date"},
288 { "TDEN", "creation_time"},
289 { "TENC", "encoded_by"},
290 { "TEN", "encoded_by"},
291 { "TIT2", "title"},
292 { "TT2", "title"},
293 { "TLAN", "language"},
294 { "TPE1", "artist"},
295 { "TP1", "artist"},
296 { "TPE2", "album_artist"},
297 { "TP2", "album_artist"},
298 { "TPE3", "performer"},
299 { "TP3", "performer"},
300 { "TPOS", "disc"},
301 { "TPUB", "publisher"},
302 { "TRCK", "track"},
303 { "TRK", "track"},
304 { "TSOA", "album-sort"},
305 { "TSOP", "artist-sort"},
306 { "TSOT", "title-sort"},
307 { "TSSE", "encoder"},
308 { 0 }
309 };
310
311 const char ff_id3v2_tags[][4] = {
312 "TALB", "TBPM", "TCOM", "TCON", "TCOP", "TDEN", "TDLY", "TDOR", "TDRC",
313 "TDRL", "TDTG", "TENC", "TEXT", "TFLT", "TIPL", "TIT1", "TIT2", "TIT3",
314 "TKEY", "TLAN", "TLEN", "TMCL", "TMED", "TMOO", "TOAL", "TOFN", "TOLY",
315 "TOPE", "TOWN", "TPE1", "TPE2", "TPE3", "TPE4", "TPOS", "TPRO", "TPUB",
316 "TRCK", "TRSN", "TRSO", "TSOA", "TSOP", "TSOT", "TSRC", "TSSE", "TSST",
317 { 0 },
318 };