added skip macroblock optimization (big perf win on black regions for example)
[libav.git] / libavcodec / mpegvideo.c
CommitLineData
de6d9b64
FB
1/*
2 * The simplest mpeg encoder (well, it was the simplest!)
3 * Copyright (c) 2000,2001 Gerard Lantau.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19#include <stdlib.h>
20#include <stdio.h>
21#include <math.h>
22#include <string.h>
23#include "avcodec.h"
24#include "dsputil.h"
25#include "mpegvideo.h"
26
27#define EDGE_WIDTH 16
28
29/* enable all paranoid tests for rounding, overflows, etc... */
30//#define PARANOID
31
32//#define DEBUG
33
34/* for jpeg fast DCT */
35#define CONST_BITS 14
36
37static const unsigned short aanscales[64] = {
38 /* precomputed values scaled up by 14 bits */
39 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
40 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
41 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
42 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
43 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
44 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
45 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
46 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
47};
48
49static UINT8 h263_chroma_roundtab[16] = {
50 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
51};
52
53static void encode_picture(MpegEncContext *s, int picture_number);
54static void rate_control_init(MpegEncContext *s);
55static int rate_estimate_qscale(MpegEncContext *s);
56
57/* default motion estimation */
58int motion_estimation_method = ME_LOG;
59
60/* XXX: should use variable shift ? */
61#define QMAT_SHIFT_MMX 19
62#define QMAT_SHIFT 25
63
64static void convert_matrix(int *qmat, const UINT16 *quant_matrix, int qscale)
65{
66 int i;
67
68 if (av_fdct == jpeg_fdct_ifast) {
69 for(i=0;i<64;i++) {
70 /* 16 <= qscale * quant_matrix[i] <= 7905 */
71 /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */
72
73 qmat[i] = (int)((1ULL << (QMAT_SHIFT + 11)) / (aanscales[i] * qscale * quant_matrix[i]));
74 }
75 } else {
76 for(i=0;i<64;i++) {
77 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
78 So 16 <= qscale * quant_matrix[i] <= 7905
79 so (1 << QMAT_SHIFT) / 16 >= qmat[i] >= (1 << QMAT_SHIFT) / 7905
80 */
81 qmat[i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
82 }
83 }
84}
85
86/* init common structure for both encoder and decoder */
87int MPV_common_init(MpegEncContext *s)
88{
89 int c_size, i;
90 UINT8 *pict;
91
92 s->mb_width = (s->width + 15) / 16;
93 s->mb_height = (s->height + 15) / 16;
94 s->linesize = s->mb_width * 16 + 2 * EDGE_WIDTH;
95
96 for(i=0;i<3;i++) {
97 int w, h, shift, pict_start;
98
99 w = s->linesize;
100 h = s->mb_height * 16 + 2 * EDGE_WIDTH;
101 shift = (i == 0) ? 0 : 1;
102 c_size = (w >> shift) * (h >> shift);
103 pict_start = (w >> shift) * (EDGE_WIDTH >> shift) + (EDGE_WIDTH >> shift);
104
105 pict = av_mallocz(c_size);
106 if (pict == NULL)
107 goto fail;
108 s->last_picture_base[i] = pict;
109 s->last_picture[i] = pict + pict_start;
110
111 pict = av_mallocz(c_size);
112 if (pict == NULL)
113 goto fail;
114 s->next_picture_base[i] = pict;
115 s->next_picture[i] = pict + pict_start;
116
117 if (s->has_b_frames) {
118 pict = av_mallocz(c_size);
119 if (pict == NULL)
120 goto fail;
121 s->aux_picture_base[i] = pict;
122 s->aux_picture[i] = pict + pict_start;
123 }
124 }
125
126 if (s->out_format == FMT_H263) {
127 int size;
128 /* MV prediction */
129 size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
130 s->motion_val = malloc(size * 2 * sizeof(INT16));
131 if (s->motion_val == NULL)
132 goto fail;
133 memset(s->motion_val, 0, size * 2 * sizeof(INT16));
134 }
135
136 if (s->h263_pred) {
137 int y_size, c_size, i, size;
138
139 /* dc values */
140
141 y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
142 c_size = (s->mb_width + 2) * (s->mb_height + 2);
143 size = y_size + 2 * c_size;
144 s->dc_val[0] = malloc(size * sizeof(INT16));
145 if (s->dc_val[0] == NULL)
146 goto fail;
147 s->dc_val[1] = s->dc_val[0] + y_size;
148 s->dc_val[2] = s->dc_val[1] + c_size;
149 for(i=0;i<size;i++)
150 s->dc_val[0][i] = 1024;
151
152 /* ac values */
153 s->ac_val[0] = av_mallocz(size * sizeof(INT16) * 16);
154 if (s->ac_val[0] == NULL)
155 goto fail;
156 s->ac_val[1] = s->ac_val[0] + y_size;
157 s->ac_val[2] = s->ac_val[1] + c_size;
158
159 /* cbp values */
160 s->coded_block = av_mallocz(y_size);
161 if (!s->coded_block)
162 goto fail;
163 }
164 /* default structure is frame */
165 s->picture_structure = PICT_FRAME;
166
167 /* init default q matrix (only for mpeg and mjpeg) */
168 for(i=0;i<64;i++) {
169 s->intra_matrix[i] = default_intra_matrix[i];
170 s->chroma_intra_matrix[i] = default_intra_matrix[i];
171 s->non_intra_matrix[i] = default_non_intra_matrix[i];
172 s->chroma_non_intra_matrix[i] = default_non_intra_matrix[i];
173 }
3bb4e23a
FB
174 /* init macroblock skip table */
175 if (!s->encoding) {
176 s->mbskip_table = av_mallocz(s->mb_width * s->mb_height);
177 if (!s->mbskip_table)
178 goto fail;
179 }
180
de6d9b64
FB
181 s->context_initialized = 1;
182 return 0;
183 fail:
184 if (s->motion_val)
185 free(s->motion_val);
186 if (s->dc_val[0])
187 free(s->dc_val[0]);
188 if (s->ac_val[0])
189 free(s->ac_val[0]);
190 if (s->coded_block)
191 free(s->coded_block);
3bb4e23a
FB
192 if (s->mbskip_table)
193 free(s->mbskip_table);
de6d9b64
FB
194 for(i=0;i<3;i++) {
195 if (s->last_picture_base[i])
196 free(s->last_picture_base[i]);
197 if (s->next_picture_base[i])
198 free(s->next_picture_base[i]);
199 if (s->aux_picture_base[i])
200 free(s->aux_picture_base[i]);
201 }
202 return -1;
203}
204
205/* init common structure for both encoder and decoder */
206void MPV_common_end(MpegEncContext *s)
207{
208 int i;
209
210 if (s->motion_val)
211 free(s->motion_val);
212 if (s->h263_pred) {
213 free(s->dc_val[0]);
214 free(s->ac_val[0]);
215 free(s->coded_block);
216 }
3bb4e23a
FB
217 if (s->mbskip_table)
218 free(s->mbskip_table);
de6d9b64
FB
219 for(i=0;i<3;i++) {
220 free(s->last_picture_base[i]);
221 free(s->next_picture_base[i]);
222 if (s->has_b_frames)
223 free(s->aux_picture_base[i]);
224 }
225 s->context_initialized = 0;
226}
227
228/* init video encoder */
229int MPV_encode_init(AVCodecContext *avctx)
230{
231 MpegEncContext *s = avctx->priv_data;
232
233 s->bit_rate = avctx->bit_rate;
234 s->frame_rate = avctx->frame_rate;
235 s->width = avctx->width;
236 s->height = avctx->height;
237 s->gop_size = avctx->gop_size;
238 if (s->gop_size <= 1) {
239 s->intra_only = 1;
240 s->gop_size = 12;
241 } else {
242 s->intra_only = 0;
243 }
244 s->full_search = motion_estimation_method;
245
246 s->fixed_qscale = (avctx->flags & CODEC_FLAG_QSCALE);
247
248 switch(avctx->codec->id) {
249 case CODEC_ID_MPEG1VIDEO:
250 s->out_format = FMT_MPEG1;
251 break;
252 case CODEC_ID_MJPEG:
253 s->out_format = FMT_MJPEG;
254 s->intra_only = 1; /* force intra only for jpeg */
255 if (mjpeg_init(s) < 0)
256 return -1;
257 break;
258 case CODEC_ID_H263:
259 if (h263_get_picture_format(s->width, s->height) == 7)
260 return -1;
261 s->out_format = FMT_H263;
262 break;
263 case CODEC_ID_H263P:
264 s->out_format = FMT_H263;
265 s->h263_plus = 1;
266 /* XXX: not unrectricted mv yet */
267 break;
268 case CODEC_ID_RV10:
269 s->out_format = FMT_H263;
270 s->h263_rv10 = 1;
271 break;
272 case CODEC_ID_OPENDIVX:
273 s->out_format = FMT_H263;
274 s->h263_pred = 1;
275 s->unrestricted_mv = 1;
276 break;
277 case CODEC_ID_MSMPEG4:
278 s->out_format = FMT_H263;
279 s->h263_msmpeg4 = 1;
280 s->h263_pred = 1;
281 s->unrestricted_mv = 1;
282 break;
283 default:
284 return -1;
285 }
286
287 if (s->out_format == FMT_H263)
288 h263_encode_init_vlc(s);
289
3bb4e23a
FB
290 s->encoding = 1;
291
de6d9b64
FB
292 /* init */
293 if (MPV_common_init(s) < 0)
294 return -1;
295
296 /* rate control init */
297 rate_control_init(s);
298
299 s->picture_number = 0;
300 s->fake_picture_number = 0;
301 /* motion detector init */
302 s->f_code = 1;
303
304 return 0;
305}
306
307int MPV_encode_end(AVCodecContext *avctx)
308{
309 MpegEncContext *s = avctx->priv_data;
310
311#ifdef STATS
312 print_stats();
313#endif
314 MPV_common_end(s);
315 if (s->out_format == FMT_MJPEG)
316 mjpeg_close(s);
317 return 0;
318}
319
320/* draw the edges of width 'w' of an image of size width, height */
321static void draw_edges(UINT8 *buf, int wrap, int width, int height, int w)
322{
323 UINT8 *ptr, *last_line;
324 int i;
325
326 last_line = buf + (height - 1) * wrap;
327 for(i=0;i<w;i++) {
328 /* top and bottom */
329 memcpy(buf - (i + 1) * wrap, buf, width);
330 memcpy(last_line + (i + 1) * wrap, last_line, width);
331 }
332 /* left and right */
333 ptr = buf;
334 for(i=0;i<height;i++) {
335 memset(ptr - w, ptr[0], w);
336 memset(ptr + width, ptr[width-1], w);
337 ptr += wrap;
338 }
339 /* corners */
340 for(i=0;i<w;i++) {
341 memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
342 memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
343 memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
344 memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
345 }
346}
347
348
349/* generic function for encode/decode called before a frame is coded/decoded */
350void MPV_frame_start(MpegEncContext *s)
351{
352 int i;
353 UINT8 *tmp;
354
355 if (s->pict_type == B_TYPE) {
356 for(i=0;i<3;i++) {
357 s->current_picture[i] = s->aux_picture[i];
358 }
359 } else {
360 for(i=0;i<3;i++) {
361 /* swap next and last */
362 tmp = s->last_picture[i];
363 s->last_picture[i] = s->next_picture[i];
364 s->next_picture[i] = tmp;
365 s->current_picture[i] = tmp;
366 }
367 }
368}
369
370/* generic function for encode/decode called after a frame has been coded/decoded */
371void MPV_frame_end(MpegEncContext *s)
372{
373 /* draw edge for correct motion prediction if outside */
374 if (s->pict_type != B_TYPE) {
375 draw_edges(s->current_picture[0], s->linesize, s->width, s->height, EDGE_WIDTH);
376 draw_edges(s->current_picture[1], s->linesize/2, s->width/2, s->height/2, EDGE_WIDTH/2);
377 draw_edges(s->current_picture[2], s->linesize/2, s->width/2, s->height/2, EDGE_WIDTH/2);
378 }
379}
380
381int MPV_encode_picture(AVCodecContext *avctx,
382 unsigned char *buf, int buf_size, void *data)
383{
384 MpegEncContext *s = avctx->priv_data;
385 AVPicture *pict = data;
386 int i, j;
387
388 if (s->fixed_qscale)
389 s->qscale = avctx->quality;
390
391 init_put_bits(&s->pb, buf, buf_size, NULL, NULL);
392
393 if (!s->intra_only) {
394 /* first picture of GOP is intra */
395 if ((s->picture_number % s->gop_size) == 0)
396 s->pict_type = I_TYPE;
397 else
398 s->pict_type = P_TYPE;
399 } else {
400 s->pict_type = I_TYPE;
401 }
402 avctx->key_frame = (s->pict_type == I_TYPE);
403
404 MPV_frame_start(s);
405
406 for(i=0;i<3;i++) {
407 UINT8 *src = pict->data[i];
408 UINT8 *dest = s->current_picture[i];
409 int src_wrap = pict->linesize[i];
410 int dest_wrap = s->linesize;
411 int w = s->width;
412 int h = s->height;
413
414 if (i >= 1) {
415 dest_wrap >>= 1;
416 w >>= 1;
417 h >>= 1;
418 }
419
420 for(j=0;j<h;j++) {
421 memcpy(dest, src, w);
422 dest += dest_wrap;
423 src += src_wrap;
424 }
425 s->new_picture[i] = s->current_picture[i];
426 }
427
428 encode_picture(s, s->picture_number);
429
430 MPV_frame_end(s);
431 s->picture_number++;
432
433 if (s->out_format == FMT_MJPEG)
434 mjpeg_picture_trailer(s);
435
436 flush_put_bits(&s->pb);
437 s->total_bits += (s->pb.buf_ptr - s->pb.buf) * 8;
438 avctx->quality = s->qscale;
439 return s->pb.buf_ptr - s->pb.buf;
440}
441
442static inline int clip(int a, int amin, int amax)
443{
444 if (a < amin)
445 return amin;
446 else if (a > amax)
447 return amax;
448 else
449 return a;
450}
451
452static int dct_quantize(MpegEncContext *s, DCTELEM *block, int n, int qscale);
453static int dct_quantize_mmx(MpegEncContext *s,
454 DCTELEM *block, int n,
455 int qscale);
456static void dct_unquantize(MpegEncContext *s, DCTELEM *block, int n, int qscale);
457
458/* apply one mpeg motion vector to the three components */
459static inline void mpeg_motion(MpegEncContext *s,
460 UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
461 int dest_offset,
462 UINT8 **ref_picture, int src_offset,
463 int field_based, op_pixels_func *pix_op,
464 int motion_x, int motion_y, int h)
465{
466 UINT8 *ptr;
467 int dxy, offset, mx, my, src_x, src_y, height, linesize;
468
469 dxy = ((motion_y & 1) << 1) | (motion_x & 1);
470 src_x = s->mb_x * 16 + (motion_x >> 1);
471 src_y = s->mb_y * (16 >> field_based) + (motion_y >> 1);
472
473 /* WARNING: do no forget half pels */
474 height = s->height >> field_based;
475 src_x = clip(src_x, -16, s->width);
476 if (src_x == s->width)
477 dxy &= ~1;
478 src_y = clip(src_y, -16, height);
479 if (src_y == height)
480 dxy &= ~2;
481 linesize = s->linesize << field_based;
482 ptr = ref_picture[0] + (src_y * linesize) + (src_x) + src_offset;
483 dest_y += dest_offset;
484 pix_op[dxy](dest_y, ptr, linesize, h);
485 pix_op[dxy](dest_y + 8, ptr + 8, linesize, h);
486
487 if (s->out_format == FMT_H263) {
488 dxy = 0;
489 if ((motion_x & 3) != 0)
490 dxy |= 1;
491 if ((motion_y & 3) != 0)
492 dxy |= 2;
493 mx = motion_x >> 2;
494 my = motion_y >> 2;
495 } else {
496 mx = motion_x / 2;
497 my = motion_y / 2;
498 dxy = ((my & 1) << 1) | (mx & 1);
499 mx >>= 1;
500 my >>= 1;
501 }
502
503 src_x = s->mb_x * 8 + mx;
504 src_y = s->mb_y * (8 >> field_based) + my;
505 src_x = clip(src_x, -8, s->width >> 1);
506 if (src_x == (s->width >> 1))
507 dxy &= ~1;
508 src_y = clip(src_y, -8, height >> 1);
509 if (src_y == (height >> 1))
510 dxy &= ~2;
511
512 offset = (src_y * (linesize >> 1)) + src_x + (src_offset >> 1);
513 ptr = ref_picture[1] + offset;
514 pix_op[dxy](dest_cb + (dest_offset >> 1), ptr, linesize >> 1, h >> 1);
515 ptr = ref_picture[2] + offset;
516 pix_op[dxy](dest_cr + (dest_offset >> 1), ptr, linesize >> 1, h >> 1);
517}
518
519static inline void MPV_motion(MpegEncContext *s,
520 UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
521 int dir, UINT8 **ref_picture,
522 op_pixels_func *pix_op)
523{
524 int dxy, offset, mx, my, src_x, src_y, motion_x, motion_y;
525 int mb_x, mb_y, i;
526 UINT8 *ptr, *dest;
527
528 mb_x = s->mb_x;
529 mb_y = s->mb_y;
530
531 switch(s->mv_type) {
532 case MV_TYPE_16X16:
533 mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
534 ref_picture, 0,
535 0, pix_op,
536 s->mv[dir][0][0], s->mv[dir][0][1], 16);
537 break;
538 case MV_TYPE_8X8:
539 for(i=0;i<4;i++) {
540 motion_x = s->mv[dir][i][0];
541 motion_y = s->mv[dir][i][1];
542
543 dxy = ((motion_y & 1) << 1) | (motion_x & 1);
544 src_x = mb_x * 16 + (motion_x >> 1) + (i & 1) * 8;
545 src_y = mb_y * 16 + (motion_y >> 1) + ((i >> 1) & 1) * 8;
546
547 /* WARNING: do no forget half pels */
548 src_x = clip(src_x, -16, s->width);
549 if (src_x == s->width)
550 dxy &= ~1;
551 src_y = clip(src_y, -16, s->height);
552 if (src_y == s->height)
553 dxy &= ~2;
554
555 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
556 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
557 pix_op[dxy](dest, ptr, s->linesize, 8);
558 }
559 /* In case of 8X8, we construct a single chroma motion vector
560 with a special rounding */
561 mx = 0;
562 my = 0;
563 for(i=0;i<4;i++) {
564 mx += s->mv[dir][i][0];
565 my += s->mv[dir][i][1];
566 }
567 if (mx >= 0)
568 mx = (h263_chroma_roundtab[mx & 0xf] + ((mx >> 3) & ~1));
569 else {
570 mx = -mx;
571 mx = -(h263_chroma_roundtab[mx & 0xf] + ((mx >> 3) & ~1));
572 }
573 if (my >= 0)
574 my = (h263_chroma_roundtab[my & 0xf] + ((my >> 3) & ~1));
575 else {
576 my = -my;
577 my = -(h263_chroma_roundtab[my & 0xf] + ((my >> 3) & ~1));
578 }
579 dxy = ((my & 1) << 1) | (mx & 1);
580 mx >>= 1;
581 my >>= 1;
582
583 src_x = mb_x * 8 + mx;
584 src_y = mb_y * 8 + my;
585 src_x = clip(src_x, -8, s->width/2);
586 if (src_x == s->width/2)
587 dxy &= ~1;
588 src_y = clip(src_y, -8, s->height/2);
589 if (src_y == s->height/2)
590 dxy &= ~2;
591
592 offset = (src_y * (s->linesize >> 1)) + src_x;
593 ptr = ref_picture[1] + offset;
594 pix_op[dxy](dest_cb, ptr, s->linesize >> 1, 8);
595 ptr = ref_picture[2] + offset;
596 pix_op[dxy](dest_cr, ptr, s->linesize >> 1, 8);
597 break;
598 case MV_TYPE_FIELD:
599 if (s->picture_structure == PICT_FRAME) {
600 /* top field */
601 mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
602 ref_picture, s->field_select[dir][0] ? s->linesize : 0,
603 1, pix_op,
604 s->mv[dir][0][0], s->mv[dir][0][1], 8);
605 /* bottom field */
606 mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
607 ref_picture, s->field_select[dir][1] ? s->linesize : 0,
608 1, pix_op,
609 s->mv[dir][1][0], s->mv[dir][1][1], 8);
610 } else {
611
612
613 }
614 break;
615 }
616}
617
618
619/* put block[] to dest[] */
620static inline void put_dct(MpegEncContext *s,
621 DCTELEM *block, int i, UINT8 *dest, int line_size)
622{
623 if (!s->mpeg2)
624 dct_unquantize(s, block, i, s->qscale);
625 j_rev_dct (block);
626 put_pixels_clamped(block, dest, line_size);
627}
628
629/* add block[] to dest[] */
630static inline void add_dct(MpegEncContext *s,
631 DCTELEM *block, int i, UINT8 *dest, int line_size)
632{
633 if (s->block_last_index[i] >= 0) {
634 if (!s->mpeg2)
635 dct_unquantize(s, block, i, s->qscale);
636 j_rev_dct (block);
637 add_pixels_clamped(block, dest, line_size);
638 }
639}
640
641/* generic function called after a macroblock has been parsed by the
642 decoder or after it has been encoded by the encoder.
643
644 Important variables used:
645 s->mb_intra : true if intra macroblock
646 s->mv_dir : motion vector direction
647 s->mv_type : motion vector type
648 s->mv : motion vector
649 s->interlaced_dct : true if interlaced dct used (mpeg2)
650 */
651void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
652{
653 int mb_x, mb_y, motion_x, motion_y;
654 int dct_linesize, dct_offset;
655 op_pixels_func *op_pix;
656
657 mb_x = s->mb_x;
658 mb_y = s->mb_y;
659
660 /* update DC predictors for P macroblocks */
661 if (!s->mb_intra) {
662 if (s->h263_pred) {
663 int wrap, x, y, v;
664 wrap = 2 * s->mb_width + 2;
665 v = 1024;
666 x = 2 * mb_x + 1;
667 y = 2 * mb_y + 1;
668 s->dc_val[0][(x) + (y) * wrap] = v;
669 s->dc_val[0][(x + 1) + (y) * wrap] = v;
670 s->dc_val[0][(x) + (y + 1) * wrap] = v;
671 s->dc_val[0][(x + 1) + (y + 1) * wrap] = v;
672 /* ac pred */
673 memset(s->ac_val[0][(x) + (y) * wrap], 0, 16 * sizeof(INT16));
674 memset(s->ac_val[0][(x + 1) + (y) * wrap], 0, 16 * sizeof(INT16));
675 memset(s->ac_val[0][(x) + (y + 1) * wrap], 0, 16 * sizeof(INT16));
676 memset(s->ac_val[0][(x + 1) + (y + 1) * wrap], 0, 16 * sizeof(INT16));
677 if (s->h263_msmpeg4) {
678 s->coded_block[(x) + (y) * wrap] = 0;
679 s->coded_block[(x + 1) + (y) * wrap] = 0;
680 s->coded_block[(x) + (y + 1) * wrap] = 0;
681 s->coded_block[(x + 1) + (y + 1) * wrap] = 0;
682 }
683 /* chroma */
684 wrap = s->mb_width + 2;
685 x = mb_x + 1;
686 y = mb_y + 1;
687 s->dc_val[1][(x) + (y) * wrap] = v;
688 s->dc_val[2][(x) + (y) * wrap] = v;
689 /* ac pred */
690 memset(s->ac_val[1][(x) + (y) * wrap], 0, 16 * sizeof(INT16));
691 memset(s->ac_val[2][(x) + (y) * wrap], 0, 16 * sizeof(INT16));
692 } else {
693 s->last_dc[0] = 128 << s->intra_dc_precision;
694 s->last_dc[1] = 128 << s->intra_dc_precision;
695 s->last_dc[2] = 128 << s->intra_dc_precision;
696 }
697 }
698
699 /* update motion predictor */
700 if (s->out_format == FMT_H263) {
701 int x, y, wrap;
702
703 x = 2 * mb_x + 1;
704 y = 2 * mb_y + 1;
705 wrap = 2 * s->mb_width + 2;
706 if (s->mb_intra) {
707 motion_x = 0;
708 motion_y = 0;
709 goto motion_init;
710 } else if (s->mv_type == MV_TYPE_16X16) {
711 motion_x = s->mv[0][0][0];
712 motion_y = s->mv[0][0][1];
713 motion_init:
714 /* no update if 8X8 because it has been done during parsing */
715 s->motion_val[(x) + (y) * wrap][0] = motion_x;
716 s->motion_val[(x) + (y) * wrap][1] = motion_y;
717 s->motion_val[(x + 1) + (y) * wrap][0] = motion_x;
718 s->motion_val[(x + 1) + (y) * wrap][1] = motion_y;
719 s->motion_val[(x) + (y + 1) * wrap][0] = motion_x;
720 s->motion_val[(x) + (y + 1) * wrap][1] = motion_y;
721 s->motion_val[(x + 1) + (y + 1) * wrap][0] = motion_x;
722 s->motion_val[(x + 1) + (y + 1) * wrap][1] = motion_y;
723 }
724 }
725
726 if (!s->intra_only) {
727 UINT8 *dest_y, *dest_cb, *dest_cr;
3bb4e23a
FB
728 UINT8 *mbskip_ptr;
729
730 /* avoid copy if macroblock skipped in last frame too */
731 if (!s->encoding) {
732 mbskip_ptr = &s->mbskip_table[s->mb_y * s->mb_width + s->mb_x];
733 if (s->mb_skiped) {
734 s->mb_skiped = 0;
735 /* if previous was skipped too, then nothing to do ! */
736 if (*mbskip_ptr != 0)
737 goto the_end;
738 *mbskip_ptr = 1; /* indicate that this time we skiped it */
739 } else {
740 *mbskip_ptr = 0; /* not skipped */
741 }
742 }
de6d9b64
FB
743
744 dest_y = s->current_picture[0] + (mb_y * 16 * s->linesize) + mb_x * 16;
745 dest_cb = s->current_picture[1] + (mb_y * 8 * (s->linesize >> 1)) + mb_x * 8;
746 dest_cr = s->current_picture[2] + (mb_y * 8 * (s->linesize >> 1)) + mb_x * 8;
747
748 if (s->interlaced_dct) {
749 dct_linesize = s->linesize * 2;
750 dct_offset = s->linesize;
751 } else {
752 dct_linesize = s->linesize;
753 dct_offset = s->linesize * 8;
754 }
755
756 if (!s->mb_intra) {
757 /* motion handling */
758 if (!s->no_rounding)
759 op_pix = put_pixels_tab;
760 else
761 op_pix = put_no_rnd_pixels_tab;
762
763 if (s->mv_dir & MV_DIR_FORWARD) {
764 MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture, op_pix);
765 if (!s->no_rounding)
766 op_pix = avg_pixels_tab;
767 else
768 op_pix = avg_no_rnd_pixels_tab;
769 }
770 if (s->mv_dir & MV_DIR_BACKWARD) {
771 MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture, op_pix);
772 }
773
774 /* add dct residue */
775 add_dct(s, block[0], 0, dest_y, dct_linesize);
776 add_dct(s, block[1], 1, dest_y + 8, dct_linesize);
777 add_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
778 add_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
779
780 add_dct(s, block[4], 4, dest_cb, dct_linesize >> 1);
781 add_dct(s, block[5], 5, dest_cr, dct_linesize >> 1);
782 } else {
783 /* dct only in intra block */
784 put_dct(s, block[0], 0, dest_y, dct_linesize);
785 put_dct(s, block[1], 1, dest_y + 8, dct_linesize);
786 put_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
787 put_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
788
789 put_dct(s, block[4], 4, dest_cb, dct_linesize >> 1);
790 put_dct(s, block[5], 5, dest_cr, dct_linesize >> 1);
791 }
792 }
3bb4e23a
FB
793 the_end:
794 emms_c();
de6d9b64
FB
795}
796
797static void encode_picture(MpegEncContext *s, int picture_number)
798{
799 int mb_x, mb_y, wrap;
800 UINT8 *ptr;
801 DCTELEM block[6][64];
802 int i, motion_x, motion_y;
803
804 s->picture_number = picture_number;
805 if (!s->fixed_qscale)
806 s->qscale = rate_estimate_qscale(s);
807
808 /* precompute matrix */
809 if (s->out_format == FMT_MJPEG) {
810 /* for mjpeg, we do include qscale in the matrix */
811 s->intra_matrix[0] = default_intra_matrix[0];
812 for(i=1;i<64;i++)
813 s->intra_matrix[i] = (default_intra_matrix[i] * s->qscale) >> 3;
814 convert_matrix(s->q_intra_matrix, s->intra_matrix, 8);
815 } else {
816 convert_matrix(s->q_intra_matrix, s->intra_matrix, s->qscale);
817 convert_matrix(s->q_non_intra_matrix, s->non_intra_matrix, s->qscale);
818 }
819
820 switch(s->out_format) {
821 case FMT_MJPEG:
822 mjpeg_picture_header(s);
823 break;
824 case FMT_H263:
825 if (s->h263_msmpeg4)
826 msmpeg4_encode_picture_header(s, picture_number);
827 else if (s->h263_pred)
828 mpeg4_encode_picture_header(s, picture_number);
829 else if (s->h263_rv10)
830 rv10_encode_picture_header(s, picture_number);
831 else
832 h263_encode_picture_header(s, picture_number);
833 break;
834 case FMT_MPEG1:
835 mpeg1_encode_picture_header(s, picture_number);
836 break;
837 }
838
839 /* init last dc values */
840 /* note: quant matrix value (8) is implied here */
841 s->last_dc[0] = 128;
842 s->last_dc[1] = 128;
843 s->last_dc[2] = 128;
844 s->mb_incr = 1;
845 s->last_mv[0][0][0] = 0;
846 s->last_mv[0][0][1] = 0;
847 s->mv_type = MV_TYPE_16X16;
848 s->mv_dir = MV_DIR_FORWARD;
849
850 for(mb_y=0; mb_y < s->mb_height; mb_y++) {
851 for(mb_x=0; mb_x < s->mb_width; mb_x++) {
852
853 s->mb_x = mb_x;
854 s->mb_y = mb_y;
855
856 /* compute motion vector and macro block type (intra or non intra) */
857 motion_x = 0;
858 motion_y = 0;
859 if (s->pict_type == P_TYPE) {
860 s->mb_intra = estimate_motion(s, mb_x, mb_y,
861 &motion_x,
862 &motion_y);
863 } else {
864 s->mb_intra = 1;
865 }
866
867 /* get the pixels */
868 wrap = s->linesize;
869 ptr = s->new_picture[0] + (mb_y * 16 * wrap) + mb_x * 16;
870 get_pixels(block[0], ptr, wrap);
871 get_pixels(block[1], ptr + 8, wrap);
872 get_pixels(block[2], ptr + 8 * wrap, wrap);
873 get_pixels(block[3], ptr + 8 * wrap + 8, wrap);
874 wrap = s->linesize >> 1;
875 ptr = s->new_picture[1] + (mb_y * 8 * wrap) + mb_x * 8;
876 get_pixels(block[4], ptr, wrap);
877
878 wrap = s->linesize >> 1;
879 ptr = s->new_picture[2] + (mb_y * 8 * wrap) + mb_x * 8;
880 get_pixels(block[5], ptr, wrap);
881
882 /* subtract previous frame if non intra */
883 if (!s->mb_intra) {
884 int dxy, offset, mx, my;
885
886 dxy = ((motion_y & 1) << 1) | (motion_x & 1);
887 ptr = s->last_picture[0] +
888 ((mb_y * 16 + (motion_y >> 1)) * s->linesize) +
889 (mb_x * 16 + (motion_x >> 1));
890
891 sub_pixels_2(block[0], ptr, s->linesize, dxy);
892 sub_pixels_2(block[1], ptr + 8, s->linesize, dxy);
893 sub_pixels_2(block[2], ptr + s->linesize * 8, s->linesize, dxy);
894 sub_pixels_2(block[3], ptr + 8 + s->linesize * 8, s->linesize ,dxy);
895
896 if (s->out_format == FMT_H263) {
897 /* special rounding for h263 */
898 dxy = 0;
899 if ((motion_x & 3) != 0)
900 dxy |= 1;
901 if ((motion_y & 3) != 0)
902 dxy |= 2;
903 mx = motion_x >> 2;
904 my = motion_y >> 2;
905 } else {
906 mx = motion_x / 2;
907 my = motion_y / 2;
908 dxy = ((my & 1) << 1) | (mx & 1);
909 mx >>= 1;
910 my >>= 1;
911 }
912 offset = ((mb_y * 8 + my) * (s->linesize >> 1)) + (mb_x * 8 + mx);
913 ptr = s->last_picture[1] + offset;
914 sub_pixels_2(block[4], ptr, s->linesize >> 1, dxy);
915 ptr = s->last_picture[2] + offset;
916 sub_pixels_2(block[5], ptr, s->linesize >> 1, dxy);
917 }
3bb4e23a 918 emms_c();
de6d9b64
FB
919
920 /* DCT & quantize */
921 if (s->h263_msmpeg4) {
922 msmpeg4_dc_scale(s);
923 } else if (s->h263_pred) {
924 h263_dc_scale(s);
925 } else {
926 /* default quantization values */
927 s->y_dc_scale = 8;
928 s->c_dc_scale = 8;
929 }
930
931 for(i=0;i<6;i++) {
932 int last_index;
933 if (av_fdct == jpeg_fdct_ifast)
934 last_index = dct_quantize(s, block[i], i, s->qscale);
935 else
936 last_index = dct_quantize_mmx(s, block[i], i, s->qscale);
937 s->block_last_index[i] = last_index;
938 }
939
940 /* huffman encode */
941 switch(s->out_format) {
942 case FMT_MPEG1:
943 mpeg1_encode_mb(s, block, motion_x, motion_y);
944 break;
945 case FMT_H263:
946 if (s->h263_msmpeg4)
947 msmpeg4_encode_mb(s, block, motion_x, motion_y);
948 else
949 h263_encode_mb(s, block, motion_x, motion_y);
950 break;
951 case FMT_MJPEG:
952 mjpeg_encode_mb(s, block);
953 break;
954 }
955
956 /* decompress blocks so that we keep the state of the decoder */
957 s->mv[0][0][0] = motion_x;
958 s->mv[0][0][1] = motion_y;
959
960 MPV_decode_mb(s, block);
961 }
962 }
963}
964
965static int dct_quantize(MpegEncContext *s,
966 DCTELEM *block, int n,
967 int qscale)
968{
969 int i, j, level, last_non_zero, q;
970 const int *qmat;
971
972 av_fdct (block);
973
974 if (s->mb_intra) {
975 if (n < 4)
976 q = s->y_dc_scale;
977 else
978 q = s->c_dc_scale;
979 q = q << 3;
980
981 /* note: block[0] is assumed to be positive */
982 block[0] = (block[0] + (q >> 1)) / q;
983 i = 1;
984 last_non_zero = 0;
985 if (s->out_format == FMT_H263) {
986 qmat = s->q_non_intra_matrix;
987 } else {
988 qmat = s->q_intra_matrix;
989 }
990 } else {
991 i = 0;
992 last_non_zero = -1;
993 qmat = s->q_non_intra_matrix;
994 }
995
996 for(;i<64;i++) {
997 j = zigzag_direct[i];
998 level = block[j];
999 level = level * qmat[j];
1000#ifdef PARANOID
1001 {
1002 static int count = 0;
1003 int level1, level2, qmat1;
1004 double val;
1005 if (qmat == s->q_non_intra_matrix) {
1006 qmat1 = default_non_intra_matrix[j] * s->qscale;
1007 } else {
1008 qmat1 = default_intra_matrix[j] * s->qscale;
1009 }
1010 if (av_fdct != jpeg_fdct_ifast)
1011 val = ((double)block[j] * 8.0) / (double)qmat1;
1012 else
1013 val = ((double)block[j] * 8.0 * 2048.0) /
1014 ((double)qmat1 * aanscales[j]);
1015 level1 = (int)val;
1016 level2 = level / (1 << (QMAT_SHIFT - 3));
1017 if (level1 != level2) {
1018 fprintf(stderr, "%d: quant error qlevel=%d wanted=%d level=%d qmat1=%d qmat=%d wantedf=%0.6f\n",
1019 count, level2, level1, block[j], qmat1, qmat[j],
1020 val);
1021 count++;
1022 }
1023
1024 }
1025#endif
1026 /* XXX: slight error for the low range. Test should be equivalent to
1027 (level <= -(1 << (QMAT_SHIFT - 3)) || level >= (1 <<
1028 (QMAT_SHIFT - 3)))
1029 */
1030 if (((level << (31 - (QMAT_SHIFT - 3))) >> (31 - (QMAT_SHIFT - 3))) !=
1031 level) {
1032 level = level / (1 << (QMAT_SHIFT - 3));
1033 /* XXX: currently, this code is not optimal. the range should be:
1034 mpeg1: -255..255
1035 mpeg2: -2048..2047
1036 h263: -128..127
1037 mpeg4: -2048..2047
1038 */
1039 if (level > 127)
1040 level = 127;
1041 else if (level < -128)
1042 level = -128;
1043 block[j] = level;
1044 last_non_zero = i;
1045 } else {
1046 block[j] = 0;
1047 }
1048 }
1049 return last_non_zero;
1050}
1051
1052static int dct_quantize_mmx(MpegEncContext *s,
1053 DCTELEM *block, int n,
1054 int qscale)
1055{
1056 int i, j, level, last_non_zero, q;
1057 const int *qmat;
1058
1059 av_fdct (block);
1060
1061 if (s->mb_intra) {
1062 if (n < 4)
1063 q = s->y_dc_scale;
1064 else
1065 q = s->c_dc_scale;
1066
1067 /* note: block[0] is assumed to be positive */
1068 block[0] = (block[0] + (q >> 1)) / q;
1069 i = 1;
1070 last_non_zero = 0;
1071 if (s->out_format == FMT_H263) {
1072 qmat = s->q_non_intra_matrix;
1073 } else {
1074 qmat = s->q_intra_matrix;
1075 }
1076 } else {
1077 i = 0;
1078 last_non_zero = -1;
1079 qmat = s->q_non_intra_matrix;
1080 }
1081
1082 for(;i<64;i++) {
1083 j = zigzag_direct[i];
1084 level = block[j];
1085 level = level * qmat[j];
1086 /* XXX: slight error for the low range. Test should be equivalent to
1087 (level <= -(1 << (QMAT_SHIFT_MMX - 3)) || level >= (1 <<
1088 (QMAT_SHIFT_MMX - 3)))
1089 */
1090 if (((level << (31 - (QMAT_SHIFT_MMX - 3))) >> (31 - (QMAT_SHIFT_MMX - 3))) !=
1091 level) {
1092 level = level / (1 << (QMAT_SHIFT_MMX - 3));
1093 /* XXX: currently, this code is not optimal. the range should be:
1094 mpeg1: -255..255
1095 mpeg2: -2048..2047
1096 h263: -128..127
1097 mpeg4: -2048..2047
1098 */
1099 if (level > 127)
1100 level = 127;
1101 else if (level < -128)
1102 level = -128;
1103 block[j] = level;
1104 last_non_zero = i;
1105 } else {
1106 block[j] = 0;
1107 }
1108 }
1109 return last_non_zero;
1110}
1111
1112static void dct_unquantize(MpegEncContext *s,
1113 DCTELEM *block, int n, int qscale)
1114{
1115 int i, level;
1116 const UINT16 *quant_matrix;
1117
1118 if (s->mb_intra) {
1119 if (n < 4)
1120 block[0] = block[0] * s->y_dc_scale;
1121 else
1122 block[0] = block[0] * s->c_dc_scale;
1123 if (s->out_format == FMT_H263) {
1124 i = 1;
1125 goto unquant_even;
1126 }
1127 /* XXX: only mpeg1 */
1128 quant_matrix = s->intra_matrix;
1129 for(i=1;i<64;i++) {
1130 level = block[i];
1131 if (level) {
1132 if (level < 0) {
1133 level = -level;
1134 level = (int)(level * qscale * quant_matrix[i]) >> 3;
1135 level = (level - 1) | 1;
1136 level = -level;
1137 } else {
1138 level = (int)(level * qscale * quant_matrix[i]) >> 3;
1139 level = (level - 1) | 1;
1140 }
1141#ifdef PARANOID
1142 if (level < -2048 || level > 2047)
1143 fprintf(stderr, "unquant error %d %d\n", i, level);
1144#endif
1145 block[i] = level;
1146 }
1147 }
1148 } else {
1149 i = 0;
1150 unquant_even:
1151 quant_matrix = s->non_intra_matrix;
1152 for(;i<64;i++) {
1153 level = block[i];
1154 if (level) {
1155 if (level < 0) {
1156 level = -level;
1157 level = (((level << 1) + 1) * qscale *
1158 ((int) (quant_matrix[i]))) >> 4;
1159 level = (level - 1) | 1;
1160 level = -level;
1161 } else {
1162 level = (((level << 1) + 1) * qscale *
1163 ((int) (quant_matrix[i]))) >> 4;
1164 level = (level - 1) | 1;
1165 }
1166#ifdef PARANOID
1167 if (level < -2048 || level > 2047)
1168 fprintf(stderr, "unquant error %d %d\n", i, level);
1169#endif
1170 block[i] = level;
1171 }
1172 }
1173 }
1174}
1175
1176
1177/* rate control */
1178
1179/* an I frame is I_FRAME_SIZE_RATIO bigger than a P frame */
1180#define I_FRAME_SIZE_RATIO 3.0
1181#define QSCALE_K 20
1182
1183static void rate_control_init(MpegEncContext *s)
1184{
1185 s->wanted_bits = 0;
1186
1187 if (s->intra_only) {
1188 s->I_frame_bits = ((INT64)s->bit_rate * FRAME_RATE_BASE) / s->frame_rate;
1189 s->P_frame_bits = s->I_frame_bits;
1190 } else {
1191 s->P_frame_bits = (int) ((float)(s->gop_size * s->bit_rate) /
1192 (float)((float)s->frame_rate / FRAME_RATE_BASE * (I_FRAME_SIZE_RATIO + s->gop_size - 1)));
1193 s->I_frame_bits = (int)(s->P_frame_bits * I_FRAME_SIZE_RATIO);
1194 }
1195
1196#if defined(DEBUG)
1197 printf("I_frame_size=%d P_frame_size=%d\n",
1198 s->I_frame_bits, s->P_frame_bits);
1199#endif
1200}
1201
1202
1203/*
1204 * This heuristic is rather poor, but at least we do not have to
1205 * change the qscale at every macroblock.
1206 */
1207static int rate_estimate_qscale(MpegEncContext *s)
1208{
1209 long long total_bits = s->total_bits;
1210 float q;
1211 int qscale, diff, qmin;
1212
1213 if (s->pict_type == I_TYPE) {
1214 s->wanted_bits += s->I_frame_bits;
1215 } else {
1216 s->wanted_bits += s->P_frame_bits;
1217 }
1218 diff = s->wanted_bits - total_bits;
1219 q = 31.0 - (float)diff / (QSCALE_K * s->mb_height * s->mb_width);
1220 /* adjust for I frame */
1221 if (s->pict_type == I_TYPE && !s->intra_only) {
1222 q /= I_FRAME_SIZE_RATIO;
1223 }
1224
1225 /* using a too small Q scale leeds to problems in mpeg1 and h263
1226 because AC coefficients are clamped to 255 or 127 */
1227 qmin = 3;
1228 if (q < qmin)
1229 q = qmin;
1230 else if (q > 31)
1231 q = 31;
1232 qscale = (int)(q + 0.5);
1233#if defined(DEBUG)
1234 printf("%d: total=%Ld br=%0.1f diff=%d qest=%0.1f\n",
1235 s->picture_number,
1236 total_bits,
1237 (float)s->frame_rate / FRAME_RATE_BASE *
1238 total_bits / s->picture_number,
1239 diff, q);
1240#endif
1241 return qscale;
1242}
1243
1244AVCodec mpeg1video_encoder = {
1245 "mpeg1video",
1246 CODEC_TYPE_VIDEO,
1247 CODEC_ID_MPEG1VIDEO,
1248 sizeof(MpegEncContext),
1249 MPV_encode_init,
1250 MPV_encode_picture,
1251 MPV_encode_end,
1252};
1253
1254AVCodec h263_encoder = {
1255 "h263",
1256 CODEC_TYPE_VIDEO,
1257 CODEC_ID_H263,
1258 sizeof(MpegEncContext),
1259 MPV_encode_init,
1260 MPV_encode_picture,
1261 MPV_encode_end,
1262};
1263
1264AVCodec h263p_encoder = {
1265 "h263p",
1266 CODEC_TYPE_VIDEO,
1267 CODEC_ID_H263P,
1268 sizeof(MpegEncContext),
1269 MPV_encode_init,
1270 MPV_encode_picture,
1271 MPV_encode_end,
1272};
1273
1274AVCodec rv10_encoder = {
1275 "rv10",
1276 CODEC_TYPE_VIDEO,
1277 CODEC_ID_RV10,
1278 sizeof(MpegEncContext),
1279 MPV_encode_init,
1280 MPV_encode_picture,
1281 MPV_encode_end,
1282};
1283
1284AVCodec mjpeg_encoder = {
1285 "mjpeg",
1286 CODEC_TYPE_VIDEO,
1287 CODEC_ID_MJPEG,
1288 sizeof(MpegEncContext),
1289 MPV_encode_init,
1290 MPV_encode_picture,
1291 MPV_encode_end,
1292};
1293
1294AVCodec opendivx_encoder = {
1295 "opendivx",
1296 CODEC_TYPE_VIDEO,
1297 CODEC_ID_OPENDIVX,
1298 sizeof(MpegEncContext),
1299 MPV_encode_init,
1300 MPV_encode_picture,
1301 MPV_encode_end,
1302};
1303
1304AVCodec msmpeg4_encoder = {
1305 "msmpeg4",
1306 CODEC_TYPE_VIDEO,
1307 CODEC_ID_MSMPEG4,
1308 sizeof(MpegEncContext),
1309 MPV_encode_init,
1310 MPV_encode_picture,
1311 MPV_encode_end,
1312};