Fix off-by-1 error in the tag parsing code.
[libav.git] / libavformat / id3v2.c
1 /*
2 * ID3v2 header parser
3 * Copyright (c) 2003 Fabrice Bellard
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 #include "id3v2.h"
23 #include "id3v1.h"
24 #include "libavutil/avstring.h"
25
26 int ff_id3v2_match(const uint8_t *buf)
27 {
28 return buf[0] == 'I' &&
29 buf[1] == 'D' &&
30 buf[2] == '3' &&
31 buf[3] != 0xff &&
32 buf[4] != 0xff &&
33 (buf[6] & 0x80) == 0 &&
34 (buf[7] & 0x80) == 0 &&
35 (buf[8] & 0x80) == 0 &&
36 (buf[9] & 0x80) == 0;
37 }
38
39 int ff_id3v2_tag_len(const uint8_t * buf)
40 {
41 int len = ((buf[6] & 0x7f) << 21) +
42 ((buf[7] & 0x7f) << 14) +
43 ((buf[8] & 0x7f) << 7) +
44 (buf[9] & 0x7f) +
45 ID3v2_HEADER_SIZE;
46 if (buf[5] & 0x10)
47 len += ID3v2_HEADER_SIZE;
48 return len;
49 }
50
51 void ff_id3v2_read(AVFormatContext *s)
52 {
53 int len, ret;
54 uint8_t buf[ID3v2_HEADER_SIZE];
55
56 ret = get_buffer(s->pb, buf, ID3v2_HEADER_SIZE);
57 if (ret != ID3v2_HEADER_SIZE)
58 return;
59 if (ff_id3v2_match(buf)) {
60 /* parse ID3v2 header */
61 len = ((buf[6] & 0x7f) << 21) |
62 ((buf[7] & 0x7f) << 14) |
63 ((buf[8] & 0x7f) << 7) |
64 (buf[9] & 0x7f);
65 ff_id3v2_parse(s, len, buf[3], buf[5]);
66 } else {
67 url_fseek(s->pb, 0, SEEK_SET);
68 }
69 }
70
71 static unsigned int get_size(ByteIOContext *s, int len)
72 {
73 int v = 0;
74 while (len--)
75 v = (v << 7) + (get_byte(s) & 0x7F);
76 return v;
77 }
78
79 static void read_ttag(AVFormatContext *s, int taglen, const char *key)
80 {
81 char *q, dst[512];
82 const char *val = NULL;
83 int len, dstlen = sizeof(dst) - 1;
84 unsigned genre;
85 unsigned int (*get)(ByteIOContext*) = get_be16;
86
87 dst[0] = 0;
88 if (taglen < 1)
89 return;
90
91 taglen--; /* account for encoding type byte */
92
93 switch (get_byte(s->pb)) { /* encoding type */
94
95 case 0: /* ISO-8859-1 (0 - 255 maps directly into unicode) */
96 q = dst;
97 while (taglen-- && q - dst < dstlen - 7) {
98 uint8_t tmp;
99 PUT_UTF8(get_byte(s->pb), tmp, *q++ = tmp;)
100 }
101 *q = 0;
102 break;
103
104 case 1: /* UTF-16 with BOM */
105 taglen -= 2;
106 switch (get_be16(s->pb)) {
107 case 0xfffe:
108 get = get_le16;
109 case 0xfeff:
110 break;
111 default:
112 av_log(s, AV_LOG_ERROR, "Incorrect BOM value in tag %s.\n", key);
113 return;
114 }
115 // fall-through
116
117 case 2: /* UTF-16BE without BOM */
118 q = dst;
119 while (taglen > 1 && q - dst < dstlen - 7) {
120 uint32_t ch;
121 uint8_t tmp;
122
123 GET_UTF16(ch, ((taglen -= 2) >= 0 ? get(s->pb) : 0), break;)
124 PUT_UTF8(ch, tmp, *q++ = tmp;)
125 }
126 *q = 0;
127 break;
128
129 case 3: /* UTF-8 */
130 len = FFMIN(taglen, dstlen);
131 get_buffer(s->pb, dst, len);
132 dst[len] = 0;
133 break;
134 default:
135 av_log(s, AV_LOG_WARNING, "Unknown encoding in tag %s\n.", key);
136 }
137
138 if (!(strcmp(key, "TCON") && strcmp(key, "TCO"))
139 && (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 1)
140 && genre <= ID3v1_GENRE_MAX)
141 val = ff_id3v1_genre_str[genre];
142 else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX"))) {
143 /* dst now contains two 0-terminated strings */
144 dst[dstlen] = 0;
145 len = strlen(dst);
146 key = dst;
147 val = dst + FFMIN(len + 1, dstlen);
148 }
149 else if (*dst)
150 val = dst;
151
152 if (val)
153 av_metadata_set(&s->metadata, key, val);
154 }
155
156 void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t flags)
157 {
158 int isv34, tlen;
159 char tag[5];
160 int64_t next;
161 int taghdrlen;
162 const char *reason;
163
164 switch (version) {
165 case 2:
166 if (flags & 0x40) {
167 reason = "compression";
168 goto error;
169 }
170 isv34 = 0;
171 taghdrlen = 6;
172 break;
173
174 case 3:
175 case 4:
176 isv34 = 1;
177 taghdrlen = 10;
178 break;
179
180 default:
181 reason = "version";
182 goto error;
183 }
184
185 if (flags & 0x80) {
186 reason = "unsynchronization";
187 goto error;
188 }
189
190 if (isv34 && flags & 0x40) /* Extended header present, just skip over it */
191 url_fskip(s->pb, get_size(s->pb, 4));
192
193 while (len >= taghdrlen) {
194 if (isv34) {
195 get_buffer(s->pb, tag, 4);
196 tag[4] = 0;
197 if(version==3){
198 tlen = get_be32(s->pb);
199 }else
200 tlen = get_size(s->pb, 4);
201 get_be16(s->pb); /* flags */
202 } else {
203 get_buffer(s->pb, tag, 3);
204 tag[3] = 0;
205 tlen = get_be24(s->pb);
206 }
207 len -= taghdrlen + tlen;
208
209 if (len < 0)
210 break;
211
212 next = url_ftell(s->pb) + tlen;
213
214 if (tag[0] == 'T')
215 read_ttag(s, tlen, tag);
216 else if (!tag[0]) {
217 if (tag[1])
218 av_log(s, AV_LOG_WARNING, "invalid frame id, assuming padding");
219 url_fskip(s->pb, len);
220 break;
221 }
222 /* Skip to end of tag */
223 url_fseek(s->pb, next, SEEK_SET);
224 }
225
226 if (version == 4 && flags & 0x10) /* Footer preset, always 10 bytes, skip over it */
227 url_fskip(s->pb, 10);
228 return;
229
230 error:
231 av_log(s, AV_LOG_INFO, "ID3v2.%d tag skipped, cannot handle %s\n", version, reason);
232 url_fskip(s->pb, len);
233 }
234
235 const AVMetadataConv ff_id3v2_metadata_conv[] = {
236 { "TALB", "album"},
237 { "TAL", "album"},
238 { "TCOM", "composer"},
239 { "TCON", "genre"},
240 { "TCO", "genre"},
241 { "TCOP", "copyright"},
242 { "TDRL", "date"},
243 { "TDRC", "date"},
244 { "TENC", "encoded_by"},
245 { "TEN", "encoded_by"},
246 { "TIT2", "title"},
247 { "TT2", "title"},
248 { "TLAN", "language"},
249 { "TPE1", "artist"},
250 { "TP1", "artist"},
251 { "TPE2", "album_artist"},
252 { "TP2", "album_artist"},
253 { "TPE3", "performer"},
254 { "TP3", "performer"},
255 { "TPOS", "disc"},
256 { "TPUB", "publisher"},
257 { "TRCK", "track"},
258 { "TRK", "track"},
259 { "TSOA", "album-sort"},
260 { "TSOP", "artist-sort"},
261 { "TSOT", "title-sort"},
262 { "TSSE", "encoder"},
263 { 0 }
264 };
265
266 const char ff_id3v2_tags[][4] = {
267 "TALB", "TBPM", "TCOM", "TCON", "TCOP", "TDEN", "TDLY", "TDOR", "TDRC",
268 "TDRL", "TDTG", "TENC", "TEXT", "TFLT", "TIPL", "TIT1", "TIT2", "TIT3",
269 "TKEY", "TLAN", "TLEN", "TMCL", "TMED", "TMOO", "TOAL", "TOFN", "TOLY",
270 "TOPE", "TOWN", "TPE1", "TPE2", "TPE3", "TPE4", "TPOS", "TPRO", "TPUB",
271 "TRCK", "TRSN", "TRSO", "TSOA", "TSOP", "TSOT", "TSRC", "TSSE", "TSST",
272 { 0 },
273 };