id3v2: use an enum for encodings instead of magic numbers.
[libav.git] / libavformat / id3v2.c
CommitLineData
2ea512a6
AC
1/*
2 * ID3v2 header parser
3 * Copyright (c) 2003 Fabrice Bellard
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22#include "id3v2.h"
75411182
PD
23#include "id3v1.h"
24#include "libavutil/avstring.h"
3a1350e8 25#include "libavutil/intreadwrite.h"
03700d39 26#include "metadata.h"
2ea512a6 27
3a1350e8 28int ff_id3v2_match(const uint8_t *buf, const char * magic)
2ea512a6 29{
3a1350e8
MK
30 return buf[0] == magic[0] &&
31 buf[1] == magic[1] &&
32 buf[2] == magic[2] &&
7d7b8c32
DB
33 buf[3] != 0xff &&
34 buf[4] != 0xff &&
35 (buf[6] & 0x80) == 0 &&
36 (buf[7] & 0x80) == 0 &&
37 (buf[8] & 0x80) == 0 &&
1d4b1bf2 38 (buf[9] & 0x80) == 0;
2ea512a6 39}
ac3ef4a4
AC
40
41int ff_id3v2_tag_len(const uint8_t * buf)
42{
43 int len = ((buf[6] & 0x7f) << 21) +
7d7b8c32
DB
44 ((buf[7] & 0x7f) << 14) +
45 ((buf[8] & 0x7f) << 7) +
46 (buf[9] & 0x7f) +
47 ID3v2_HEADER_SIZE;
ac3ef4a4
AC
48 if (buf[5] & 0x10)
49 len += ID3v2_HEADER_SIZE;
50 return len;
51}
75411182
PD
52
53static unsigned int get_size(ByteIOContext *s, int len)
54{
7d7b8c32
DB
55 int v = 0;
56 while (len--)
57 v = (v << 7) + (get_byte(s) & 0x7F);
75411182
PD
58 return v;
59}
60
18bbe9df 61static void read_ttag(AVFormatContext *s, ByteIOContext *pb, int taglen, const char *key)
75411182
PD
62{
63 char *q, dst[512];
41770abf 64 const char *val = NULL;
75411182
PD
65 int len, dstlen = sizeof(dst) - 1;
66 unsigned genre;
20c68378 67 unsigned int (*get)(ByteIOContext*) = get_be16;
75411182 68
7d7b8c32
DB
69 dst[0] = 0;
70 if (taglen < 1)
75411182
PD
71 return;
72
73 taglen--; /* account for encoding type byte */
74
18bbe9df 75 switch (get_byte(pb)) { /* encoding type */
75411182 76
d66eff36 77 case ID3v2_ENCODING_ISO8859:
75411182 78 q = dst;
787f8fad 79 while (taglen-- && q - dst < dstlen - 7) {
75411182 80 uint8_t tmp;
18bbe9df 81 PUT_UTF8(get_byte(pb), tmp, *q++ = tmp;)
75411182 82 }
9aa1bcce 83 *q = 0;
75411182
PD
84 break;
85
d66eff36 86 case ID3v2_ENCODING_UTF16BOM:
20c68378 87 taglen -= 2;
18bbe9df 88 switch (get_be16(pb)) {
20c68378
AK
89 case 0xfffe:
90 get = get_le16;
91 case 0xfeff:
92 break;
93 default:
94 av_log(s, AV_LOG_ERROR, "Incorrect BOM value in tag %s.\n", key);
95 return;
96 }
97 // fall-through
98
d66eff36 99 case ID3v2_ENCODING_UTF16BE:
20c68378
AK
100 q = dst;
101 while (taglen > 1 && q - dst < dstlen - 7) {
102 uint32_t ch;
103 uint8_t tmp;
104
18bbe9df 105 GET_UTF16(ch, ((taglen -= 2) >= 0 ? get(pb) : 0), break;)
20c68378
AK
106 PUT_UTF8(ch, tmp, *q++ = tmp;)
107 }
108 *q = 0;
109 break;
110
d66eff36 111 case ID3v2_ENCODING_UTF8:
037e9afd 112 len = FFMIN(taglen, dstlen);
18bbe9df 113 get_buffer(pb, dst, len);
75411182
PD
114 dst[len] = 0;
115 break;
20c68378
AK
116 default:
117 av_log(s, AV_LOG_WARNING, "Unknown encoding in tag %s\n.", key);
75411182
PD
118 }
119
41770abf 120 if (!(strcmp(key, "TCON") && strcmp(key, "TCO"))
75411182
PD
121 && (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 1)
122 && genre <= ID3v1_GENRE_MAX)
41770abf
AK
123 val = ff_id3v1_genre_str[genre];
124 else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX"))) {
125 /* dst now contains two 0-terminated strings */
126 dst[dstlen] = 0;
127 len = strlen(dst);
128 key = dst;
129 val = dst + FFMIN(len + 1, dstlen);
130 }
131 else if (*dst)
132 val = dst;
75411182 133
41770abf 134 if (val)
75aded83 135 av_metadata_set2(&s->metadata, key, val, AV_METADATA_DONT_OVERWRITE);
75411182
PD
136}
137
46a2da76 138static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t flags)
75411182 139{
18bbe9df 140 int isv34, tlen, unsync;
41770abf 141 char tag[5];
75411182
PD
142 int64_t next;
143 int taghdrlen;
144 const char *reason;
18bbe9df
AK
145 ByteIOContext pb;
146 unsigned char *buffer = NULL;
147 int buffer_size = 0;
75411182 148
7d7b8c32 149 switch (version) {
75411182 150 case 2:
7d7b8c32 151 if (flags & 0x40) {
75411182
PD
152 reason = "compression";
153 goto error;
154 }
155 isv34 = 0;
156 taghdrlen = 6;
157 break;
158
159 case 3:
160 case 4:
161 isv34 = 1;
162 taghdrlen = 10;
163 break;
164
165 default:
166 reason = "version";
167 goto error;
168 }
169
18bbe9df 170 unsync = flags & 0x80;
75411182 171
7d7b8c32 172 if (isv34 && flags & 0x40) /* Extended header present, just skip over it */
75411182
PD
173 url_fskip(s->pb, get_size(s->pb, 4));
174
7d7b8c32 175 while (len >= taghdrlen) {
18bbe9df
AK
176 unsigned int tflags;
177 int tunsync = 0;
178
7d7b8c32 179 if (isv34) {
41770abf
AK
180 get_buffer(s->pb, tag, 4);
181 tag[4] = 0;
3fd5a75b 182 if(version==3){
d004179e 183 tlen = get_be32(s->pb);
3fd5a75b
MN
184 }else
185 tlen = get_size(s->pb, 4);
18bbe9df 186 tflags = get_be16(s->pb);
7a07d158 187 tunsync = tflags & ID3v2_FLAG_UNSYNCH;
75411182 188 } else {
41770abf
AK
189 get_buffer(s->pb, tag, 3);
190 tag[3] = 0;
1cd44221 191 tlen = get_be24(s->pb);
75411182
PD
192 }
193 len -= taghdrlen + tlen;
194
7d7b8c32 195 if (len < 0)
75411182
PD
196 break;
197
198 next = url_ftell(s->pb) + tlen;
199
a152c77f
AK
200 if (tflags & ID3v2_FLAG_DATALEN) {
201 get_be32(s->pb);
202 tlen -= 4;
203 }
204
407d3d5a
AK
205 if (tflags & (ID3v2_FLAG_ENCRYPTION | ID3v2_FLAG_COMPRESSION)) {
206 av_log(s, AV_LOG_WARNING, "Skipping encrypted/compressed ID3v2 frame %s.\n", tag);
207 url_fskip(s->pb, tlen);
208 } else if (tag[0] == 'T') {
18bbe9df
AK
209 if (unsync || tunsync) {
210 int i, j;
211 av_fast_malloc(&buffer, &buffer_size, tlen);
212 for (i = 0, j = 0; i < tlen; i++, j++) {
213 buffer[j] = get_byte(s->pb);
214 if (j > 0 && !buffer[j] && buffer[j - 1] == 0xff) {
215 /* Unsynchronised byte, skip it */
216 j--;
217 }
218 }
219 init_put_byte(&pb, buffer, j, 0, NULL, NULL, NULL, NULL);
220 read_ttag(s, &pb, j, tag);
221 } else {
222 read_ttag(s, s->pb, tlen, tag);
223 }
224 }
2e3ca1ff
JM
225 else if (!tag[0]) {
226 if (tag[1])
227 av_log(s, AV_LOG_WARNING, "invalid frame id, assuming padding");
ff58de29 228 url_fskip(s->pb, tlen);
2e3ca1ff
JM
229 break;
230 }
75411182
PD
231 /* Skip to end of tag */
232 url_fseek(s->pb, next, SEEK_SET);
233 }
234
ff58de29
AK
235 if (len > 0) {
236 /* Skip padding */
237 url_fskip(s->pb, len);
238 }
7d7b8c32 239 if (version == 4 && flags & 0x10) /* Footer preset, always 10 bytes, skip over it */
75411182 240 url_fskip(s->pb, 10);
18bbe9df
AK
241
242 av_free(buffer);
75411182
PD
243 return;
244
245 error:
246 av_log(s, AV_LOG_INFO, "ID3v2.%d tag skipped, cannot handle %s\n", version, reason);
247 url_fskip(s->pb, len);
18bbe9df 248 av_free(buffer);
75411182 249}
6378b062 250
46a2da76
AK
251void ff_id3v2_read(AVFormatContext *s, const char *magic)
252{
253 int len, ret;
254 uint8_t buf[ID3v2_HEADER_SIZE];
255 int found_header;
256 int64_t off;
257
258 do {
259 /* save the current offset in case there's nothing to read/skip */
260 off = url_ftell(s->pb);
261 ret = get_buffer(s->pb, buf, ID3v2_HEADER_SIZE);
262 if (ret != ID3v2_HEADER_SIZE)
f7fcd6a2 263 break;
46a2da76
AK
264 found_header = ff_id3v2_match(buf, magic);
265 if (found_header) {
266 /* parse ID3v2 header */
267 len = ((buf[6] & 0x7f) << 21) |
268 ((buf[7] & 0x7f) << 14) |
269 ((buf[8] & 0x7f) << 7) |
270 (buf[9] & 0x7f);
271 ff_id3v2_parse(s, len, buf[3], buf[5]);
272 } else {
273 url_fseek(s->pb, off, SEEK_SET);
274 }
275 } while (found_header);
f7fcd6a2 276 ff_metadata_conv(&s->metadata, NULL, ff_id3v2_metadata_conv);
46a2da76
AK
277}
278
6378b062
AK
279const AVMetadataConv ff_id3v2_metadata_conv[] = {
280 { "TALB", "album"},
dfe9ee6b 281 { "TAL", "album"},
6378b062
AK
282 { "TCOM", "composer"},
283 { "TCON", "genre"},
dfe9ee6b 284 { "TCO", "genre"},
6378b062
AK
285 { "TCOP", "copyright"},
286 { "TDRL", "date"},
ca76a119 287 { "TDRC", "date"},
bcb5d217 288 { "TDEN", "creation_time"},
ca76a119
AK
289 { "TENC", "encoded_by"},
290 { "TEN", "encoded_by"},
6378b062 291 { "TIT2", "title"},
dfe9ee6b 292 { "TT2", "title"},
6378b062 293 { "TLAN", "language"},
8a98be1a 294 { "TPE1", "artist"},
dfe9ee6b 295 { "TP1", "artist"},
ca76a119
AK
296 { "TPE2", "album_artist"},
297 { "TP2", "album_artist"},
298 { "TPE3", "performer"},
299 { "TP3", "performer"},
6378b062
AK
300 { "TPOS", "disc"},
301 { "TPUB", "publisher"},
302 { "TRCK", "track"},
dfe9ee6b 303 { "TRK", "track"},
ca76a119
AK
304 { "TSOA", "album-sort"},
305 { "TSOP", "artist-sort"},
306 { "TSOT", "title-sort"},
307 { "TSSE", "encoder"},
6378b062
AK
308 { 0 }
309};
078d89a2
AK
310
311const char ff_id3v2_tags[][4] = {
312 "TALB", "TBPM", "TCOM", "TCON", "TCOP", "TDEN", "TDLY", "TDOR", "TDRC",
313 "TDRL", "TDTG", "TENC", "TEXT", "TFLT", "TIPL", "TIT1", "TIT2", "TIT3",
314 "TKEY", "TLAN", "TLEN", "TMCL", "TMED", "TMOO", "TOAL", "TOFN", "TOLY",
315 "TOPE", "TOWN", "TPE1", "TPE2", "TPE3", "TPE4", "TPOS", "TPRO", "TPUB",
316 "TRCK", "TRSN", "TRSO", "TSOA", "TSOP", "TSOT", "TSRC", "TSSE", "TSST",
317 { 0 },
318};