aacenc: Use exact values when quantizing, not fuzzy values.
[libav.git] / libavcodec / aaccoder.c
CommitLineData
78e65cd7
AC
1/*
2 * AAC coefficients encoder
3 * Copyright (C) 2008-2009 Konstantin Shishkov
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22/**
ba87f080 23 * @file
78e65cd7
AC
24 * AAC coefficients encoder
25 */
26
27/***********************************
28 * TODOs:
29 * speedup quantizer selection
30 * add sane pulse detection
31 ***********************************/
32
33#include "avcodec.h"
34#include "put_bits.h"
35#include "aac.h"
36#include "aacenc.h"
37#include "aactab.h"
38
39/** bits needed to code codebook run value for long windows */
40static const uint8_t run_value_bits_long[64] = {
41 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
42 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10,
43 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
44 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
45};
46
47/** bits needed to code codebook run value for short windows */
48static const uint8_t run_value_bits_short[16] = {
49 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
50};
51
99d61d34 52static const uint8_t *run_value_bits[2] = {
78e65cd7
AC
53 run_value_bits_long, run_value_bits_short
54};
55
56
57/**
58 * Quantize one coefficient.
59 * @return absolute value of the quantized coefficient
60 * @see 3GPP TS26.403 5.6.2 "Scalefactor determination"
61 */
62static av_always_inline int quant(float coef, const float Q)
63{
3d51be01
AC
64 float a = coef * Q;
65 return sqrtf(a * sqrtf(a)) + 0.4054;
78e65cd7
AC
66}
67
4d986b71 68static void quantize_bands(int *out, const float *in, const float *scaled,
99d61d34 69 int size, float Q34, int is_signed, int maxval)
78e65cd7
AC
70{
71 int i;
72 double qc;
73 for (i = 0; i < size; i++) {
74 qc = scaled[i] * Q34;
4d986b71 75 out[i] = (int)FFMIN(qc + 0.4054, (double)maxval);
78e65cd7 76 if (is_signed && in[i] < 0.0f) {
4d986b71 77 out[i] = -out[i];
78e65cd7
AC
78 }
79 }
80}
81
99d61d34 82static void abs_pow34_v(float *out, const float *in, const int size)
78e65cd7
AC
83{
84#ifndef USE_REALLY_FULL_SEARCH
85 int i;
3d51be01
AC
86 for (i = 0; i < size; i++) {
87 float a = fabsf(in[i]);
88 out[i] = sqrtf(a * sqrtf(a));
89 }
78e65cd7
AC
90#endif /* USE_REALLY_FULL_SEARCH */
91}
92
78e65cd7
AC
93static const uint8_t aac_cb_range [12] = {0, 3, 3, 3, 3, 9, 9, 8, 8, 13, 13, 17};
94static const uint8_t aac_cb_maxval[12] = {0, 1, 1, 2, 2, 4, 4, 7, 7, 12, 12, 16};
95
96/**
97 * Calculate rate distortion cost for quantizing with given codebook
98 *
99 * @return quantization distortion
100 */
508f092a
AC
101static float quantize_and_encode_band_cost(struct AACEncContext *s,
102 PutBitContext *pb, const float *in,
99d61d34
DB
103 const float *scaled, int size, int scale_idx,
104 int cb, const float lambda, const float uplim,
105 int *bits)
78e65cd7
AC
106{
107 const float IQ = ff_aac_pow2sf_tab[200 + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
108 const float Q = ff_aac_pow2sf_tab[200 - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
109 const float CLIPPED_ESCAPE = 165140.0f*IQ;
110 int i, j, k;
111 float cost = 0;
112 const int dim = cb < FIRST_PAIR_BT ? 4 : 2;
113 int resbits = 0;
3d51be01 114 const float Q34 = sqrtf(Q * sqrtf(Q));
99d61d34 115 const int range = aac_cb_range[cb];
78e65cd7 116 const int maxval = aac_cb_maxval[cb];
4d986b71 117 int off;
78e65cd7 118
fd257dc4
AC
119 if (!cb) {
120 for (i = 0; i < size; i++)
0bd9aa44 121 cost += in[i]*in[i];
a5762c9b
AC
122 if (bits)
123 *bits = 0;
0bd9aa44 124 return cost * lambda;
78e65cd7 125 }
508f092a
AC
126 if (!scaled) {
127 abs_pow34_v(s->scoefs, in, size);
128 scaled = s->scoefs;
129 }
78e65cd7 130 quantize_bands(s->qcoefs, in, scaled, size, Q34, !IS_CODEBOOK_UNSIGNED(cb), maxval);
4d986b71
AC
131 if (IS_CODEBOOK_UNSIGNED(cb)) {
132 off = 0;
133 } else {
134 off = maxval;
135 }
fd257dc4 136 for (i = 0; i < size; i += dim) {
78e65cd7 137 const float *vec;
4d986b71
AC
138 int *quants = s->qcoefs + i;
139 int curidx = 0;
140 int curbits;
141 float rd = 0.0f;
142 for (j = 0; j < dim; j++) {
143 curidx *= range;
144 curidx += quants[j] + off;
145 }
99d61d34
DB
146 curbits = ff_aac_spectral_bits[cb-1][curidx];
147 vec = &ff_aac_codebook_vectors[cb-1][curidx*dim];
fd257dc4
AC
148 if (IS_CODEBOOK_UNSIGNED(cb)) {
149 for (k = 0; k < dim; k++) {
78e65cd7
AC
150 float t = fabsf(in[i+k]);
151 float di;
a71e9b62 152 if (vec[k] == 64.0f) { //FIXME: slow
78e65cd7
AC
153 if (t >= CLIPPED_ESCAPE) {
154 di = t - CLIPPED_ESCAPE;
155 curbits += 21;
fd257dc4 156 } else {
78e65cd7 157 int c = av_clip(quant(t, Q), 0, 8191);
9d4f6f10 158 di = t - c*cbrtf(c)*IQ;
78e65cd7
AC
159 curbits += av_log2(c)*2 - 4 + 1;
160 }
fd257dc4 161 } else {
78e65cd7
AC
162 di = t - vec[k]*IQ;
163 }
fd257dc4 164 if (vec[k] != 0.0f)
78e65cd7 165 curbits++;
0bd9aa44 166 rd += di*di;
78e65cd7 167 }
fd257dc4
AC
168 } else {
169 for (k = 0; k < dim; k++) {
78e65cd7 170 float di = in[i+k] - vec[k]*IQ;
0bd9aa44 171 rd += di*di;
78e65cd7
AC
172 }
173 }
4d986b71
AC
174 cost += rd * lambda + curbits;
175 resbits += curbits;
fd257dc4 176 if (cost >= uplim)
78e65cd7 177 return uplim;
508f092a 178 if (pb) {
4d986b71 179 put_bits(pb, ff_aac_spectral_bits[cb-1][curidx], ff_aac_spectral_codes[cb-1][curidx]);
fd257dc4
AC
180 if (IS_CODEBOOK_UNSIGNED(cb))
181 for (j = 0; j < dim; j++)
4d986b71 182 if (ff_aac_codebook_vectors[cb-1][curidx*dim+j] != 0.0f)
78e65cd7 183 put_bits(pb, 1, in[i+j] < 0.0f);
fd257dc4
AC
184 if (cb == ESC_BT) {
185 for (j = 0; j < 2; j++) {
4d986b71 186 if (ff_aac_codebook_vectors[cb-1][curidx*2+j] == 64.0f) {
78e65cd7
AC
187 int coef = av_clip(quant(fabsf(in[i+j]), Q), 0, 8191);
188 int len = av_log2(coef);
189
190 put_bits(pb, len - 4 + 1, (1 << (len - 4 + 1)) - 2);
191 put_bits(pb, len, coef & ((1 << len) - 1));
192 }
193 }
194 }
508f092a 195 }
78e65cd7 196 }
508f092a
AC
197
198 if (bits)
199 *bits = resbits;
200 return cost;
201}
202static float quantize_band_cost(struct AACEncContext *s, const float *in,
203 const float *scaled, int size, int scale_idx,
204 int cb, const float lambda, const float uplim,
205 int *bits)
206{
207 return quantize_and_encode_band_cost(s, NULL, in, scaled, size, scale_idx,
208 cb, lambda, uplim, bits);
209}
210
211static void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb,
212 const float *in, int size, int scale_idx,
213 int cb, const float lambda)
214{
215 quantize_and_encode_band_cost(s, pb, in, NULL, size, scale_idx, cb, lambda,
216 INFINITY, NULL);
78e65cd7
AC
217}
218
219/**
220 * structure used in optimal codebook search
221 */
222typedef struct BandCodingPath {
223 int prev_idx; ///< pointer to the previous path point
78e65cd7
AC
224 float cost; ///< path cost
225 int run;
226} BandCodingPath;
227
228/**
229 * Encode band info for single window group bands.
230 */
231static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce,
232 int win, int group_len, const float lambda)
233{
234 BandCodingPath path[120][12];
235 int w, swb, cb, start, start2, size;
236 int i, j;
99d61d34 237 const int max_sfb = sce->ics.max_sfb;
78e65cd7 238 const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
99d61d34 239 const int run_esc = (1 << run_bits) - 1;
78e65cd7
AC
240 int idx, ppos, count;
241 int stackrun[120], stackcb[120], stack_len;
242 float next_minrd = INFINITY;
243 int next_mincb = 0;
244
245 abs_pow34_v(s->scoefs, sce->coeffs, 1024);
246 start = win*128;
fd257dc4 247 for (cb = 0; cb < 12; cb++) {
99d61d34 248 path[0][cb].cost = 0.0f;
78e65cd7 249 path[0][cb].prev_idx = -1;
99d61d34 250 path[0][cb].run = 0;
78e65cd7 251 }
fd257dc4 252 for (swb = 0; swb < max_sfb; swb++) {
78e65cd7
AC
253 start2 = start;
254 size = sce->ics.swb_sizes[swb];
fd257dc4
AC
255 if (sce->zeroes[win*16 + swb]) {
256 for (cb = 0; cb < 12; cb++) {
78e65cd7 257 path[swb+1][cb].prev_idx = cb;
99d61d34
DB
258 path[swb+1][cb].cost = path[swb][cb].cost;
259 path[swb+1][cb].run = path[swb][cb].run + 1;
78e65cd7 260 }
fd257dc4 261 } else {
78e65cd7
AC
262 float minrd = next_minrd;
263 int mincb = next_mincb;
264 next_minrd = INFINITY;
265 next_mincb = 0;
fd257dc4 266 for (cb = 0; cb < 12; cb++) {
78e65cd7
AC
267 float cost_stay_here, cost_get_here;
268 float rd = 0.0f;
fd257dc4 269 for (w = 0; w < group_len; w++) {
78e65cd7
AC
270 FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(win+w)*16+swb];
271 rd += quantize_band_cost(s, sce->coeffs + start + w*128,
272 s->scoefs + start + w*128, size,
273 sce->sf_idx[(win+w)*16+swb], cb,
274 lambda / band->threshold, INFINITY, NULL);
275 }
276 cost_stay_here = path[swb][cb].cost + rd;
277 cost_get_here = minrd + rd + run_bits + 4;
fd257dc4 278 if ( run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
99d61d34 279 != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
78e65cd7
AC
280 cost_stay_here += run_bits;
281 if (cost_get_here < cost_stay_here) {
282 path[swb+1][cb].prev_idx = mincb;
283 path[swb+1][cb].cost = cost_get_here;
284 path[swb+1][cb].run = 1;
285 } else {
286 path[swb+1][cb].prev_idx = cb;
287 path[swb+1][cb].cost = cost_stay_here;
288 path[swb+1][cb].run = path[swb][cb].run + 1;
289 }
290 if (path[swb+1][cb].cost < next_minrd) {
291 next_minrd = path[swb+1][cb].cost;
292 next_mincb = cb;
293 }
294 }
295 }
296 start += sce->ics.swb_sizes[swb];
297 }
298
299 //convert resulting path from backward-linked list
300 stack_len = 0;
99d61d34 301 idx = 0;
c8f47d8b 302 for (cb = 1; cb < 12; cb++)
fd257dc4 303 if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
78e65cd7 304 idx = cb;
78e65cd7 305 ppos = max_sfb;
99d61d34 306 while (ppos > 0) {
78e65cd7
AC
307 cb = idx;
308 stackrun[stack_len] = path[ppos][cb].run;
309 stackcb [stack_len] = cb;
310 idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
311 ppos -= path[ppos][cb].run;
312 stack_len++;
313 }
314 //perform actual band info encoding
315 start = 0;
fd257dc4 316 for (i = stack_len - 1; i >= 0; i--) {
78e65cd7
AC
317 put_bits(&s->pb, 4, stackcb[i]);
318 count = stackrun[i];
319 memset(sce->zeroes + win*16 + start, !stackcb[i], count);
320 //XXX: memset when band_type is also uint8_t
fd257dc4 321 for (j = 0; j < count; j++) {
78e65cd7
AC
322 sce->band_type[win*16 + start] = stackcb[i];
323 start++;
324 }
99d61d34 325 while (count >= run_esc) {
78e65cd7
AC
326 put_bits(&s->pb, run_bits, run_esc);
327 count -= run_esc;
328 }
329 put_bits(&s->pb, run_bits, count);
330 }
331}
332
78e65cd7
AC
333typedef struct TrellisPath {
334 float cost;
335 int prev;
336 int min_val;
337 int max_val;
338} TrellisPath;
339
f5e82fec
AC
340#define TRELLIS_STAGES 121
341#define TRELLIS_STATES 256
342
78e65cd7 343static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
99d61d34
DB
344 SingleChannelElement *sce,
345 const float lambda)
78e65cd7
AC
346{
347 int q, w, w2, g, start = 0;
9072c29e 348 int i, j;
78e65cd7 349 int idx;
f5e82fec
AC
350 TrellisPath paths[TRELLIS_STAGES][TRELLIS_STATES];
351 int bandaddr[TRELLIS_STAGES];
78e65cd7
AC
352 int minq;
353 float mincost;
354
f5e82fec 355 for (i = 0; i < TRELLIS_STATES; i++) {
9072c29e
AC
356 paths[0][i].cost = 0.0f;
357 paths[0][i].prev = -1;
358 paths[0][i].min_val = i;
359 paths[0][i].max_val = i;
78e65cd7 360 }
f5e82fec
AC
361 for (j = 1; j < TRELLIS_STAGES; j++) {
362 for (i = 0; i < TRELLIS_STATES; i++) {
9072c29e
AC
363 paths[j][i].cost = INFINITY;
364 paths[j][i].prev = -2;
365 paths[j][i].min_val = INT_MAX;
366 paths[j][i].max_val = 0;
367 }
78e65cd7 368 }
9072c29e 369 idx = 1;
78e65cd7 370 abs_pow34_v(s->scoefs, sce->coeffs, 1024);
fd257dc4 371 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
78e65cd7 372 start = w*128;
fd257dc4 373 for (g = 0; g < sce->ics.num_swb; g++) {
78e65cd7
AC
374 const float *coefs = sce->coeffs + start;
375 float qmin, qmax;
376 int nz = 0;
377
9072c29e 378 bandaddr[idx] = w * 16 + g;
78e65cd7
AC
379 qmin = INT_MAX;
380 qmax = 0.0f;
fd257dc4 381 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
78e65cd7 382 FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
fd257dc4 383 if (band->energy <= band->threshold || band->threshold == 0.0f) {
78e65cd7
AC
384 sce->zeroes[(w+w2)*16+g] = 1;
385 continue;
386 }
387 sce->zeroes[(w+w2)*16+g] = 0;
388 nz = 1;
fd257dc4 389 for (i = 0; i < sce->ics.swb_sizes[g]; i++) {
78e65cd7 390 float t = fabsf(coefs[w2*128+i]);
c8f47d8b 391 if (t > 0.0f)
988c1705
AC
392 qmin = FFMIN(qmin, t);
393 qmax = FFMAX(qmax, t);
78e65cd7
AC
394 }
395 }
fd257dc4 396 if (nz) {
78e65cd7
AC
397 int minscale, maxscale;
398 float minrd = INFINITY;
399 //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
400 minscale = av_clip_uint8(log2(qmin)*4 - 69 + SCALE_ONE_POS - SCALE_DIV_512);
401 //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
402 maxscale = av_clip_uint8(log2(qmax)*4 + 6 + SCALE_ONE_POS - SCALE_DIV_512);
fd257dc4 403 for (q = minscale; q < maxscale; q++) {
78e65cd7
AC
404 float dists[12], dist;
405 memset(dists, 0, sizeof(dists));
fd257dc4 406 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
78e65cd7
AC
407 FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
408 int cb;
c8f47d8b 409 for (cb = 0; cb <= ESC_BT; cb++)
78e65cd7
AC
410 dists[cb] += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g],
411 q, cb, lambda / band->threshold, INFINITY, NULL);
78e65cd7
AC
412 }
413 dist = dists[0];
fd257dc4 414 for (i = 1; i <= ESC_BT; i++)
988c1705
AC
415 dist = FFMIN(dist, dists[i]);
416 minrd = FFMIN(minrd, dist);
78e65cd7 417
f5e82fec 418 for (i = FFMAX(q - SCALE_MAX_DIFF, 0); i < FFMIN(q + SCALE_MAX_DIFF, TRELLIS_STATES); i++) {
78e65cd7
AC
419 float cost;
420 int minv, maxv;
9072c29e 421 if (isinf(paths[idx - 1][i].cost))
78e65cd7 422 continue;
9072c29e 423 cost = paths[idx - 1][i].cost + dist
78e65cd7 424 + ff_aac_scalefactor_bits[q - i + SCALE_DIFF_ZERO];
9072c29e
AC
425 minv = FFMIN(paths[idx - 1][i].min_val, q);
426 maxv = FFMAX(paths[idx - 1][i].max_val, q);
427 if (cost < paths[idx][q].cost && maxv-minv < SCALE_MAX_DIFF) {
428 paths[idx][q].cost = cost;
429 paths[idx][q].prev = i;
430 paths[idx][q].min_val = minv;
431 paths[idx][q].max_val = maxv;
78e65cd7
AC
432 }
433 }
434 }
fd257dc4 435 } else {
f5e82fec 436 for (q = 0; q < TRELLIS_STATES; q++) {
9072c29e
AC
437 if (!isinf(paths[idx - 1][q].cost)) {
438 paths[idx][q].cost = paths[idx - 1][q].cost + 1;
439 paths[idx][q].prev = q;
440 paths[idx][q].min_val = FFMIN(paths[idx - 1][q].min_val, q);
441 paths[idx][q].max_val = FFMAX(paths[idx - 1][q].max_val, q);
78e65cd7
AC
442 continue;
443 }
f5e82fec 444 for (i = FFMAX(q - SCALE_MAX_DIFF, 0); i < FFMIN(q + SCALE_MAX_DIFF, TRELLIS_STATES); i++) {
78e65cd7
AC
445 float cost;
446 int minv, maxv;
9072c29e 447 if (isinf(paths[idx - 1][i].cost))
78e65cd7 448 continue;
9072c29e
AC
449 cost = paths[idx - 1][i].cost + ff_aac_scalefactor_bits[q - i + SCALE_DIFF_ZERO];
450 minv = FFMIN(paths[idx - 1][i].min_val, q);
451 maxv = FFMAX(paths[idx - 1][i].max_val, q);
452 if (cost < paths[idx][q].cost && maxv-minv < SCALE_MAX_DIFF) {
453 paths[idx][q].cost = cost;
454 paths[idx][q].prev = i;
455 paths[idx][q].min_val = minv;
456 paths[idx][q].max_val = maxv;
78e65cd7
AC
457 }
458 }
459 }
460 }
461 sce->zeroes[w*16+g] = !nz;
462 start += sce->ics.swb_sizes[g];
9072c29e 463 idx++;
78e65cd7
AC
464 }
465 }
9072c29e
AC
466 idx--;
467 mincost = paths[idx][0].cost;
468 minq = 0;
f5e82fec 469 for (i = 1; i < TRELLIS_STATES; i++) {
9072c29e
AC
470 if (paths[idx][i].cost < mincost) {
471 mincost = paths[idx][i].cost;
472 minq = i;
78e65cd7
AC
473 }
474 }
9072c29e
AC
475 while (idx) {
476 sce->sf_idx[bandaddr[idx]] = minq;
477 minq = paths[idx][minq].prev;
478 idx--;
78e65cd7
AC
479 }
480 //set the same quantizers inside window groups
fd257dc4
AC
481 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
482 for (g = 0; g < sce->ics.num_swb; g++)
483 for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
78e65cd7
AC
484 sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
485}
486
487/**
488 * two-loop quantizers search taken from ISO 13818-7 Appendix C
489 */
99d61d34
DB
490static void search_for_quantizers_twoloop(AVCodecContext *avctx,
491 AACEncContext *s,
492 SingleChannelElement *sce,
493 const float lambda)
78e65cd7
AC
494{
495 int start = 0, i, w, w2, g;
496 int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels;
497 float dists[128], uplims[128];
498 int fflag, minscaler;
99d61d34 499 int its = 0;
78e65cd7
AC
500 int allz = 0;
501 float minthr = INFINITY;
502
503 //XXX: some heuristic to determine initial quantizers will reduce search time
504 memset(dists, 0, sizeof(dists));
505 //determine zero bands and upper limits
fd257dc4
AC
506 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
507 for (g = 0; g < sce->ics.num_swb; g++) {
78e65cd7
AC
508 int nz = 0;
509 float uplim = 0.0f;
fd257dc4 510 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
78e65cd7
AC
511 FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
512 uplim += band->threshold;
fd257dc4 513 if (band->energy <= band->threshold || band->threshold == 0.0f) {
78e65cd7
AC
514 sce->zeroes[(w+w2)*16+g] = 1;
515 continue;
516 }
517 nz = 1;
518 }
519 uplims[w*16+g] = uplim *512;
520 sce->zeroes[w*16+g] = !nz;
fd257dc4 521 if (nz)
988c1705 522 minthr = FFMIN(minthr, uplim);
78e65cd7
AC
523 allz = FFMAX(allz, nz);
524 }
525 }
fd257dc4
AC
526 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
527 for (g = 0; g < sce->ics.num_swb; g++) {
528 if (sce->zeroes[w*16+g]) {
78e65cd7
AC
529 sce->sf_idx[w*16+g] = SCALE_ONE_POS;
530 continue;
531 }
988c1705 532 sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2(uplims[w*16+g]/minthr)*4,59);
78e65cd7
AC
533 }
534 }
535
fd257dc4 536 if (!allz)
78e65cd7
AC
537 return;
538 abs_pow34_v(s->scoefs, sce->coeffs, 1024);
539 //perform two-loop search
540 //outer loop - improve quality
99d61d34 541 do {
78e65cd7
AC
542 int tbits, qstep;
543 minscaler = sce->sf_idx[0];
544 //inner loop - quantize spectrum to fit into given number of bits
545 qstep = its ? 1 : 32;
99d61d34 546 do {
78e65cd7
AC
547 int prev = -1;
548 tbits = 0;
549 fflag = 0;
fd257dc4 550 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
78e65cd7 551 start = w*128;
fd257dc4 552 for (g = 0; g < sce->ics.num_swb; g++) {
78e65cd7
AC
553 const float *coefs = sce->coeffs + start;
554 const float *scaled = s->scoefs + start;
555 int bits = 0;
556 int cb;
557 float mindist = INFINITY;
558 int minbits = 0;
559
a62d6cfe
AC
560 if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
561 start += sce->ics.swb_sizes[g];
78e65cd7 562 continue;
a62d6cfe 563 }
78e65cd7 564 minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
51cbabc7 565 {
78e65cd7
AC
566 float dist = 0.0f;
567 int bb = 0;
51cbabc7
AC
568 float maxval = 0.0f;
569 float Q = ff_aac_pow2sf_tab[200 - sce->sf_idx[w*16+g] + SCALE_ONE_POS - SCALE_DIV_512];
570 float Q34 = sqrtf(Q * sqrtf(Q));
571 int qmaxval;
572 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
573 for (i = 0; i < sce->ics.swb_sizes[g]; i++) {
574 maxval = fmaxf(maxval, scaled[w2*128+i]);
575 }
576 }
577 qmaxval = maxval * Q34 + 0.4054;
578 if (qmaxval == 0) cb = 0;
579 else if (qmaxval == 1) cb = 1;
580 else if (qmaxval == 2) cb = 3;
581 else if (qmaxval <= 4) cb = 5;
582 else if (qmaxval <= 7) cb = 7;
583 else if (qmaxval <= 12) cb = 9;
584 else cb = 11;
fd257dc4 585 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
78e65cd7
AC
586 int b;
587 dist += quantize_band_cost(s, coefs + w2*128,
588 scaled + w2*128,
589 sce->ics.swb_sizes[g],
590 sce->sf_idx[w*16+g],
01939922 591 cb,
7a4eebcd 592 lambda,
78e65cd7
AC
593 INFINITY,
594 &b);
595 bb += b;
596 }
78e65cd7
AC
597 mindist = dist;
598 minbits = bb;
78e65cd7 599 }
7a4eebcd 600 dists[w*16+g] = (mindist - minbits) / lambda;
78e65cd7 601 bits = minbits;
fd257dc4 602 if (prev != -1) {
78e65cd7
AC
603 bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
604 }
605 tbits += bits;
606 start += sce->ics.swb_sizes[g];
607 prev = sce->sf_idx[w*16+g];
608 }
609 }
fd257dc4 610 if (tbits > destbits) {
c8f47d8b
DB
611 for (i = 0; i < 128; i++)
612 if (sce->sf_idx[i] < 218 - qstep)
78e65cd7 613 sce->sf_idx[i] += qstep;
fd257dc4 614 } else {
c8f47d8b
DB
615 for (i = 0; i < 128; i++)
616 if (sce->sf_idx[i] > 60 - qstep)
78e65cd7 617 sce->sf_idx[i] -= qstep;
78e65cd7
AC
618 }
619 qstep >>= 1;
fd257dc4 620 if (!qstep && tbits > destbits*1.02)
78e65cd7 621 qstep = 1;
c8f47d8b
DB
622 if (sce->sf_idx[0] >= 217)
623 break;
99d61d34 624 } while (qstep);
78e65cd7
AC
625
626 fflag = 0;
627 minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
fd257dc4 628 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
78e65cd7 629 start = w*128;
fd257dc4 630 for (g = 0; g < sce->ics.num_swb; g++) {
78e65cd7 631 int prevsc = sce->sf_idx[w*16+g];
fd257dc4 632 if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60)
78e65cd7
AC
633 sce->sf_idx[w*16+g]--;
634 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
635 sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
fd257dc4 636 if (sce->sf_idx[w*16+g] != prevsc)
78e65cd7
AC
637 fflag = 1;
638 }
639 }
640 its++;
99d61d34 641 } while (fflag && its < 10);
78e65cd7
AC
642}
643
644static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s,
99d61d34
DB
645 SingleChannelElement *sce,
646 const float lambda)
78e65cd7
AC
647{
648 int start = 0, i, w, w2, g;
649 float uplim[128], maxq[128];
650 int minq, maxsf;
651 float distfact = ((sce->ics.num_windows > 1) ? 85.80 : 147.84) / lambda;
652 int last = 0, lastband = 0, curband = 0;
653 float avg_energy = 0.0;
fd257dc4 654 if (sce->ics.num_windows == 1) {
78e65cd7 655 start = 0;
fd257dc4
AC
656 for (i = 0; i < 1024; i++) {
657 if (i - start >= sce->ics.swb_sizes[curband]) {
78e65cd7
AC
658 start += sce->ics.swb_sizes[curband];
659 curband++;
660 }
fd257dc4 661 if (sce->coeffs[i]) {
78e65cd7
AC
662 avg_energy += sce->coeffs[i] * sce->coeffs[i];
663 last = i;
664 lastband = curband;
665 }
666 }
fd257dc4
AC
667 } else {
668 for (w = 0; w < 8; w++) {
78e65cd7
AC
669 const float *coeffs = sce->coeffs + w*128;
670 start = 0;
fd257dc4
AC
671 for (i = 0; i < 128; i++) {
672 if (i - start >= sce->ics.swb_sizes[curband]) {
78e65cd7
AC
673 start += sce->ics.swb_sizes[curband];
674 curband++;
675 }
fd257dc4 676 if (coeffs[i]) {
78e65cd7
AC
677 avg_energy += coeffs[i] * coeffs[i];
678 last = FFMAX(last, i);
679 lastband = FFMAX(lastband, curband);
680 }
681 }
682 }
683 }
684 last++;
685 avg_energy /= last;
fd257dc4
AC
686 if (avg_energy == 0.0f) {
687 for (i = 0; i < FF_ARRAY_ELEMS(sce->sf_idx); i++)
78e65cd7
AC
688 sce->sf_idx[i] = SCALE_ONE_POS;
689 return;
690 }
fd257dc4 691 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
78e65cd7 692 start = w*128;
fd257dc4 693 for (g = 0; g < sce->ics.num_swb; g++) {
99d61d34 694 float *coefs = sce->coeffs + start;
78e65cd7
AC
695 const int size = sce->ics.swb_sizes[g];
696 int start2 = start, end2 = start + size, peakpos = start;
697 float maxval = -1, thr = 0.0f, t;
698 maxq[w*16+g] = 0.0f;
fd257dc4 699 if (g > lastband) {
78e65cd7
AC
700 maxq[w*16+g] = 0.0f;
701 start += size;
fd257dc4 702 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++)
78e65cd7
AC
703 memset(coefs + w2*128, 0, sizeof(coefs[0])*size);
704 continue;
705 }
fd257dc4
AC
706 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
707 for (i = 0; i < size; i++) {
78e65cd7 708 float t = coefs[w2*128+i]*coefs[w2*128+i];
988c1705 709 maxq[w*16+g] = FFMAX(maxq[w*16+g], fabsf(coefs[w2*128 + i]));
78e65cd7 710 thr += t;
fd257dc4 711 if (sce->ics.num_windows == 1 && maxval < t) {
99d61d34 712 maxval = t;
78e65cd7
AC
713 peakpos = start+i;
714 }
715 }
716 }
fd257dc4 717 if (sce->ics.num_windows == 1) {
78e65cd7
AC
718 start2 = FFMAX(peakpos - 2, start2);
719 end2 = FFMIN(peakpos + 3, end2);
fd257dc4 720 } else {
78e65cd7
AC
721 start2 -= start;
722 end2 -= start;
723 }
724 start += size;
725 thr = pow(thr / (avg_energy * (end2 - start2)), 0.3 + 0.1*(lastband - g) / lastband);
99d61d34 726 t = 1.0 - (1.0 * start2 / last);
78e65cd7
AC
727 uplim[w*16+g] = distfact / (1.4 * thr + t*t*t + 0.075);
728 }
729 }
730 memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
731 abs_pow34_v(s->scoefs, sce->coeffs, 1024);
fd257dc4 732 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
78e65cd7 733 start = w*128;
fd257dc4 734 for (g = 0; g < sce->ics.num_swb; g++) {
99d61d34
DB
735 const float *coefs = sce->coeffs + start;
736 const float *scaled = s->scoefs + start;
737 const int size = sce->ics.swb_sizes[g];
78e65cd7 738 int scf, prev_scf, step;
32fa7725 739 int min_scf = -1, max_scf = 256;
78e65cd7 740 float curdiff;
fd257dc4 741 if (maxq[w*16+g] < 21.544) {
78e65cd7
AC
742 sce->zeroes[w*16+g] = 1;
743 start += size;
744 continue;
745 }
746 sce->zeroes[w*16+g] = 0;
99d61d34 747 scf = prev_scf = av_clip(SCALE_ONE_POS - SCALE_DIV_512 - log2(1/maxq[w*16+g])*16/3, 60, 218);
78e65cd7 748 step = 16;
fd257dc4 749 for (;;) {
78e65cd7
AC
750 float dist = 0.0f;
751 int quant_max;
752
fd257dc4 753 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
78e65cd7
AC
754 int b;
755 dist += quantize_band_cost(s, coefs + w2*128,
756 scaled + w2*128,
757 sce->ics.swb_sizes[g],
758 scf,
759 ESC_BT,
7a4eebcd 760 lambda,
78e65cd7
AC
761 INFINITY,
762 &b);
763 dist -= b;
764 }
7a4eebcd 765 dist *= 1.0f / 512.0f / lambda;
78e65cd7 766 quant_max = quant(maxq[w*16+g], ff_aac_pow2sf_tab[200 - scf + SCALE_ONE_POS - SCALE_DIV_512]);
fd257dc4 767 if (quant_max >= 8191) { // too much, return to the previous quantizer
78e65cd7
AC
768 sce->sf_idx[w*16+g] = prev_scf;
769 break;
770 }
771 prev_scf = scf;
772 curdiff = fabsf(dist - uplim[w*16+g]);
32fa7725 773 if (curdiff <= 1.0f)
78e65cd7
AC
774 step = 0;
775 else
32fa7725 776 step = log2(curdiff);
fd257dc4 777 if (dist > uplim[w*16+g])
78e65cd7 778 step = -step;
32fa7725 779 scf += step;
46174079 780 scf = av_clip_uint8(scf);
32fa7725 781 step = scf - prev_scf;
fd257dc4 782 if (FFABS(step) <= 1 || (step > 0 && scf >= max_scf) || (step < 0 && scf <= min_scf)) {
32fa7725 783 sce->sf_idx[w*16+g] = av_clip(scf, min_scf, max_scf);
78e65cd7
AC
784 break;
785 }
fd257dc4 786 if (step > 0)
32fa7725 787 min_scf = prev_scf;
78e65cd7 788 else
32fa7725 789 max_scf = prev_scf;
78e65cd7
AC
790 }
791 start += size;
792 }
793 }
794 minq = sce->sf_idx[0] ? sce->sf_idx[0] : INT_MAX;
fd257dc4
AC
795 for (i = 1; i < 128; i++) {
796 if (!sce->sf_idx[i])
78e65cd7
AC
797 sce->sf_idx[i] = sce->sf_idx[i-1];
798 else
799 minq = FFMIN(minq, sce->sf_idx[i]);
800 }
c8f47d8b
DB
801 if (minq == INT_MAX)
802 minq = 0;
78e65cd7
AC
803 minq = FFMIN(minq, SCALE_MAX_POS);
804 maxsf = FFMIN(minq + SCALE_MAX_DIFF, SCALE_MAX_POS);
fd257dc4
AC
805 for (i = 126; i >= 0; i--) {
806 if (!sce->sf_idx[i])
78e65cd7
AC
807 sce->sf_idx[i] = sce->sf_idx[i+1];
808 sce->sf_idx[i] = av_clip(sce->sf_idx[i], minq, maxsf);
809 }
810}
811
812static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s,
99d61d34
DB
813 SingleChannelElement *sce,
814 const float lambda)
78e65cd7
AC
815{
816 int start = 0, i, w, w2, g;
817 int minq = 255;
818
819 memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
fd257dc4 820 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
78e65cd7 821 start = w*128;
fd257dc4
AC
822 for (g = 0; g < sce->ics.num_swb; g++) {
823 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
78e65cd7 824 FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
fd257dc4 825 if (band->energy <= band->threshold) {
78e65cd7
AC
826 sce->sf_idx[(w+w2)*16+g] = 218;
827 sce->zeroes[(w+w2)*16+g] = 1;
fd257dc4 828 } else {
78e65cd7
AC
829 sce->sf_idx[(w+w2)*16+g] = av_clip(SCALE_ONE_POS - SCALE_DIV_512 + log2(band->threshold), 80, 218);
830 sce->zeroes[(w+w2)*16+g] = 0;
831 }
832 minq = FFMIN(minq, sce->sf_idx[(w+w2)*16+g]);
833 }
834 }
835 }
fd257dc4 836 for (i = 0; i < 128; i++) {
c8f47d8b
DB
837 sce->sf_idx[i] = 140;
838 //av_clip(sce->sf_idx[i], minq, minq + SCALE_MAX_DIFF - 1);
78e65cd7
AC
839 }
840 //set the same quantizers inside window groups
fd257dc4
AC
841 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
842 for (g = 0; g < sce->ics.num_swb; g++)
843 for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
78e65cd7
AC
844 sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
845}
846
99d61d34
DB
847static void search_for_ms(AACEncContext *s, ChannelElement *cpe,
848 const float lambda)
78e65cd7
AC
849{
850 int start = 0, i, w, w2, g;
851 float M[128], S[128];
852 float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
853 SingleChannelElement *sce0 = &cpe->ch[0];
854 SingleChannelElement *sce1 = &cpe->ch[1];
fd257dc4 855 if (!cpe->common_window)
78e65cd7 856 return;
fd257dc4
AC
857 for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
858 for (g = 0; g < sce0->ics.num_swb; g++) {
859 if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
78e65cd7 860 float dist1 = 0.0f, dist2 = 0.0f;
fd257dc4 861 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
78e65cd7
AC
862 FFPsyBand *band0 = &s->psy.psy_bands[(s->cur_channel+0)*PSY_MAX_BANDS+(w+w2)*16+g];
863 FFPsyBand *band1 = &s->psy.psy_bands[(s->cur_channel+1)*PSY_MAX_BANDS+(w+w2)*16+g];
988c1705
AC
864 float minthr = FFMIN(band0->threshold, band1->threshold);
865 float maxthr = FFMAX(band0->threshold, band1->threshold);
fd257dc4 866 for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
78e65cd7 867 M[i] = (sce0->coeffs[start+w2*128+i]
99d61d34 868 + sce1->coeffs[start+w2*128+i]) * 0.5;
78e65cd7
AC
869 S[i] = sce0->coeffs[start+w2*128+i]
870 - sce1->coeffs[start+w2*128+i];
871 }
872 abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
873 abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
874 abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
875 abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
876 dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128,
877 L34,
878 sce0->ics.swb_sizes[g],
879 sce0->sf_idx[(w+w2)*16+g],
880 sce0->band_type[(w+w2)*16+g],
881 lambda / band0->threshold, INFINITY, NULL);
882 dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128,
883 R34,
884 sce1->ics.swb_sizes[g],
885 sce1->sf_idx[(w+w2)*16+g],
886 sce1->band_type[(w+w2)*16+g],
887 lambda / band1->threshold, INFINITY, NULL);
888 dist2 += quantize_band_cost(s, M,
889 M34,
890 sce0->ics.swb_sizes[g],
891 sce0->sf_idx[(w+w2)*16+g],
892 sce0->band_type[(w+w2)*16+g],
893 lambda / maxthr, INFINITY, NULL);
894 dist2 += quantize_band_cost(s, S,
895 S34,
896 sce1->ics.swb_sizes[g],
897 sce1->sf_idx[(w+w2)*16+g],
898 sce1->band_type[(w+w2)*16+g],
899 lambda / minthr, INFINITY, NULL);
900 }
901 cpe->ms_mask[w*16+g] = dist2 < dist1;
902 }
903 start += sce0->ics.swb_sizes[g];
904 }
905 }
906}
907
908AACCoefficientsEncoder ff_aac_coders[] = {
909 {
910 search_for_quantizers_faac,
7a4eebcd 911 encode_window_bands_info,
78e65cd7 912 quantize_and_encode_band,
dd0e43e4 913 search_for_ms,
78e65cd7
AC
914 },
915 {
916 search_for_quantizers_anmr,
917 encode_window_bands_info,
918 quantize_and_encode_band,
dd0e43e4 919 search_for_ms,
78e65cd7
AC
920 },
921 {
922 search_for_quantizers_twoloop,
923 encode_window_bands_info,
924 quantize_and_encode_band,
dd0e43e4 925 search_for_ms,
78e65cd7
AC
926 },
927 {
928 search_for_quantizers_fast,
929 encode_window_bands_info,
930 quantize_and_encode_band,
dd0e43e4 931 search_for_ms,
78e65cd7
AC
932 },
933};