3 * Copyright (C) 2004 Mike Melanson <melanson@pcisys.net>
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * Sorenson Vector Quantizer #1 (SVQ1) video codec.
25 * For more information of the SVQ1 algorithm, visit:
26 * http://www.pcisys.net/~melanson/codecs/
31 #include "mpegvideo.h"
35 #include "svq1enc_cb.h"
40 typedef struct SVQ1Context
{
41 /* FIXME: Needed for motion estimation, should not be used for anything
42 * else, the idea is to make the motion estimation eventually independent
43 * of MpegEncContext, so this will be removed then. */
45 AVCodecContext
*avctx
;
48 AVFrame current_picture
;
53 /* why ooh why this sick breadth first order,
54 * everything is slower and more complex */
55 PutBitContext reorder_pb
[6];
60 /* Y plane block dimensions */
64 /* U & V plane (C planes) block dimensions */
70 int16_t (*motion_val8
[3])[2];
71 int16_t (*motion_val16
[3])[2];
78 static void svq1_write_header(SVQ1Context
*s
, int frame_type
)
83 put_bits(&s
->pb
, 22, 0x20);
85 /* temporal reference (sure hope this is a "don't care") */
86 put_bits(&s
->pb
, 8, 0x00);
89 put_bits(&s
->pb
, 2, frame_type
- 1);
91 if (frame_type
== AV_PICTURE_TYPE_I
) {
92 /* no checksum since frame code is 0x20 */
93 /* no embedded string either */
94 /* output 5 unknown bits (2 + 2 + 1) */
95 put_bits(&s
->pb
, 5, 2); /* 2 needed by quicktime decoder */
97 i
= ff_match_2uint16(ff_svq1_frame_size_table
,
98 FF_ARRAY_ELEMS(ff_svq1_frame_size_table
),
99 s
->frame_width
, s
->frame_height
);
100 put_bits(&s
->pb
, 3, i
);
103 put_bits(&s
->pb
, 12, s
->frame_width
);
104 put_bits(&s
->pb
, 12, s
->frame_height
);
108 /* no checksum or extra data (next 2 bits get 0) */
109 put_bits(&s
->pb
, 2, 0);
112 #define QUALITY_THRESHOLD 100
113 #define THRESHOLD_MULTIPLIER 0.6
115 static int encode_block(SVQ1Context
*s
, uint8_t *src
, uint8_t *ref
,
116 uint8_t *decoded
, int stride
, int level
,
117 int threshold
, int lambda
, int intra
)
119 int count
, y
, x
, i
, j
, split
, best_mean
, best_score
, best_count
;
121 int block_sum
[7] = { 0, 0, 0, 0, 0, 0 };
122 int w
= 2 << (level
+ 2 >> 1);
123 int h
= 2 << (level
+ 1 >> 1);
125 int16_t block
[7][256];
126 const int8_t *codebook_sum
, *codebook
;
127 const uint16_t(*mean_vlc
)[2];
128 const uint8_t(*multistage_vlc
)[2];
131 // FIXME: Optimize, this does not need to be done multiple times.
133 codebook_sum
= svq1_intra_codebook_sum
[level
];
134 codebook
= ff_svq1_intra_codebooks
[level
];
135 mean_vlc
= ff_svq1_intra_mean_vlc
;
136 multistage_vlc
= ff_svq1_intra_multistage_vlc
[level
];
137 for (y
= 0; y
< h
; y
++) {
138 for (x
= 0; x
< w
; x
++) {
139 int v
= src
[x
+ y
* stride
];
140 block
[0][x
+ w
* y
] = v
;
146 codebook_sum
= svq1_inter_codebook_sum
[level
];
147 codebook
= ff_svq1_inter_codebooks
[level
];
148 mean_vlc
= ff_svq1_inter_mean_vlc
+ 256;
149 multistage_vlc
= ff_svq1_inter_multistage_vlc
[level
];
150 for (y
= 0; y
< h
; y
++) {
151 for (x
= 0; x
< w
; x
++) {
152 int v
= src
[x
+ y
* stride
] - ref
[x
+ y
* stride
];
153 block
[0][x
+ w
* y
] = v
;
161 best_score
-= (int)((unsigned)block_sum
[0] * block_sum
[0] >> (level
+ 3));
162 best_mean
= block_sum
[0] + (size
>> 1) >> (level
+ 3);
165 for (count
= 1; count
< 7; count
++) {
166 int best_vector_score
= INT_MAX
;
167 int best_vector_sum
= -999, best_vector_mean
= -999;
168 const int stage
= count
- 1;
169 const int8_t *vector
;
171 for (i
= 0; i
< 16; i
++) {
172 int sum
= codebook_sum
[stage
* 16 + i
];
173 int sqr
, diff
, score
;
175 vector
= codebook
+ stage
* size
* 16 + i
* size
;
176 sqr
= s
->dsp
.ssd_int8_vs_int16(vector
, block
[stage
], size
);
177 diff
= block_sum
[stage
] - sum
;
178 score
= sqr
- (diff
* (int64_t)diff
>> (level
+ 3)); // FIXME: 64bit slooow
179 if (score
< best_vector_score
) {
180 int mean
= diff
+ (size
>> 1) >> (level
+ 3);
181 assert(mean
> -300 && mean
< 300);
182 mean
= av_clip(mean
, intra ?
0 : -256, 255);
183 best_vector_score
= score
;
184 best_vector
[stage
] = i
;
185 best_vector_sum
= sum
;
186 best_vector_mean
= mean
;
189 assert(best_vector_mean
!= -999);
190 vector
= codebook
+ stage
* size
* 16 + best_vector
[stage
] * size
;
191 for (j
= 0; j
< size
; j
++)
192 block
[stage
+ 1][j
] = block
[stage
][j
] - vector
[j
];
193 block_sum
[stage
+ 1] = block_sum
[stage
] - best_vector_sum
;
194 best_vector_score
+= lambda
*
196 multistage_vlc
[1 + count
][1]
197 + mean_vlc
[best_vector_mean
][1]);
199 if (best_vector_score
< best_score
) {
200 best_score
= best_vector_score
;
202 best_mean
= best_vector_mean
;
208 if (best_score
> threshold
&& level
) {
210 int offset
= level
& 1 ? stride
* h
/ 2 : w
/ 2;
211 PutBitContext backup
[6];
213 for (i
= level
- 1; i
>= 0; i
--)
214 backup
[i
] = s
->reorder_pb
[i
];
215 score
+= encode_block(s
, src
, ref
, decoded
, stride
, level
- 1,
216 threshold
>> 1, lambda
, intra
);
217 score
+= encode_block(s
, src
+ offset
, ref
+ offset
, decoded
+ offset
,
218 stride
, level
- 1, threshold
>> 1, lambda
, intra
);
221 if (score
< best_score
) {
225 for (i
= level
- 1; i
>= 0; i
--)
226 s
->reorder_pb
[i
] = backup
[i
];
230 put_bits(&s
->reorder_pb
[level
], 1, split
);
233 assert(best_mean
>= 0 && best_mean
< 256 || !intra
);
234 assert(best_mean
>= -256 && best_mean
< 256);
235 assert(best_count
>= 0 && best_count
< 7);
236 assert(level
< 4 || best_count
== 0);
238 /* output the encoding */
239 put_bits(&s
->reorder_pb
[level
],
240 multistage_vlc
[1 + best_count
][1],
241 multistage_vlc
[1 + best_count
][0]);
242 put_bits(&s
->reorder_pb
[level
], mean_vlc
[best_mean
][1],
243 mean_vlc
[best_mean
][0]);
245 for (i
= 0; i
< best_count
; i
++) {
246 assert(best_vector
[i
] >= 0 && best_vector
[i
] < 16);
247 put_bits(&s
->reorder_pb
[level
], 4, best_vector
[i
]);
250 for (y
= 0; y
< h
; y
++)
251 for (x
= 0; x
< w
; x
++)
252 decoded
[x
+ y
* stride
] = src
[x
+ y
* stride
] -
253 block
[best_count
][x
+ w
* y
] +
260 static int svq1_encode_plane(SVQ1Context
*s
, int plane
,
261 unsigned char *src_plane
,
262 unsigned char *ref_plane
,
263 unsigned char *decoded_plane
,
264 int width
, int height
, int src_stride
, int stride
)
268 int block_width
, block_height
;
271 uint8_t *src
= s
->scratchbuf
+ stride
* 16;
272 const int lambda
= (s
->picture
.quality
* s
->picture
.quality
) >>
273 (2 * FF_LAMBDA_SHIFT
);
275 /* figure out the acceptable level thresholds in advance */
276 threshold
[5] = QUALITY_THRESHOLD
;
277 for (level
= 4; level
>= 0; level
--)
278 threshold
[level
] = threshold
[level
+ 1] * THRESHOLD_MULTIPLIER
;
280 block_width
= (width
+ 15) / 16;
281 block_height
= (height
+ 15) / 16;
283 if (s
->picture
.pict_type
== AV_PICTURE_TYPE_P
) {
284 s
->m
.avctx
= s
->avctx
;
285 s
->m
.current_picture_ptr
= &s
->m
.current_picture
;
286 s
->m
.last_picture_ptr
= &s
->m
.last_picture
;
287 s
->m
.last_picture
.f
.data
[0] = ref_plane
;
289 s
->m
.last_picture
.f
.linesize
[0] =
290 s
->m
.new_picture
.f
.linesize
[0] =
291 s
->m
.current_picture
.f
.linesize
[0] = stride
;
293 s
->m
.height
= height
;
294 s
->m
.mb_width
= block_width
;
295 s
->m
.mb_height
= block_height
;
296 s
->m
.mb_stride
= s
->m
.mb_width
+ 1;
297 s
->m
.b8_stride
= 2 * s
->m
.mb_width
+ 1;
299 s
->m
.pict_type
= s
->picture
.pict_type
;
300 s
->m
.me_method
= s
->avctx
->me_method
;
301 s
->m
.me
.scene_change_score
= 0;
302 s
->m
.flags
= s
->avctx
->flags
;
303 // s->m.out_format = FMT_H263;
304 // s->m.unrestricted_mv = 1;
305 s
->m
.lambda
= s
->picture
.quality
;
306 s
->m
.qscale
= s
->m
.lambda
* 139 +
307 FF_LAMBDA_SCALE
* 64 >>
309 s
->m
.lambda2
= s
->m
.lambda
* s
->m
.lambda
+
310 FF_LAMBDA_SCALE
/ 2 >>
313 if (!s
->motion_val8
[plane
]) {
314 s
->motion_val8
[plane
] = av_mallocz((s
->m
.b8_stride
*
315 block_height
* 2 + 2) *
316 2 * sizeof(int16_t));
317 s
->motion_val16
[plane
] = av_mallocz((s
->m
.mb_stride
*
318 (block_height
+ 2) + 1) *
319 2 * sizeof(int16_t));
322 s
->m
.mb_type
= s
->mb_type
;
324 // dummies, to avoid segfaults
325 s
->m
.current_picture
.mb_mean
= (uint8_t *)s
->dummy
;
326 s
->m
.current_picture
.mb_var
= (uint16_t *)s
->dummy
;
327 s
->m
.current_picture
.mc_mb_var
= (uint16_t *)s
->dummy
;
328 s
->m
.current_picture
.f
.mb_type
= s
->dummy
;
330 s
->m
.current_picture
.f
.motion_val
[0] = s
->motion_val8
[plane
] + 2;
331 s
->m
.p_mv_table
= s
->motion_val16
[plane
] +
333 s
->m
.dsp
= s
->dsp
; // move
336 s
->m
.me
.dia_size
= s
->avctx
->dia_size
;
337 s
->m
.first_slice_line
= 1;
338 for (y
= 0; y
< block_height
; y
++) {
339 s
->m
.new_picture
.f
.data
[0] = src
- y
* 16 * stride
; // ugly
342 for (i
= 0; i
< 16 && i
+ 16 * y
< height
; i
++) {
343 memcpy(&src
[i
* stride
], &src_plane
[(i
+ 16 * y
) * src_stride
],
345 for (x
= width
; x
< 16 * block_width
; x
++)
346 src
[i
* stride
+ x
] = src
[i
* stride
+ x
- 1];
348 for (; i
< 16 && i
+ 16 * y
< 16 * block_height
; i
++)
349 memcpy(&src
[i
* stride
], &src
[(i
- 1) * stride
],
352 for (x
= 0; x
< block_width
; x
++) {
354 ff_init_block_index(&s
->m
);
355 ff_update_block_index(&s
->m
);
357 ff_estimate_p_frame_motion(&s
->m
, x
, y
);
359 s
->m
.first_slice_line
= 0;
362 ff_fix_long_p_mvs(&s
->m
);
363 ff_fix_long_mvs(&s
->m
, NULL
, 0, s
->m
.p_mv_table
, s
->m
.f_code
,
364 CANDIDATE_MB_TYPE_INTER
, 0);
367 s
->m
.first_slice_line
= 1;
368 for (y
= 0; y
< block_height
; y
++) {
369 for (i
= 0; i
< 16 && i
+ 16 * y
< height
; i
++) {
370 memcpy(&src
[i
* stride
], &src_plane
[(i
+ 16 * y
) * src_stride
],
372 for (x
= width
; x
< 16 * block_width
; x
++)
373 src
[i
* stride
+ x
] = src
[i
* stride
+ x
- 1];
375 for (; i
< 16 && i
+ 16 * y
< 16 * block_height
; i
++)
376 memcpy(&src
[i
* stride
], &src
[(i
- 1) * stride
], 16 * block_width
);
379 for (x
= 0; x
< block_width
; x
++) {
380 uint8_t reorder_buffer
[3][6][7 * 32];
382 int offset
= y
* 16 * stride
+ x
* 16;
383 uint8_t *decoded
= decoded_plane
+ offset
;
384 uint8_t *ref
= ref_plane
+ offset
;
385 int score
[4] = { 0, 0, 0, 0 }, best
;
386 uint8_t *temp
= s
->scratchbuf
;
388 if (s
->pb
.buf_end
- s
->pb
.buf
-
389 (put_bits_count(&s
->pb
) >> 3) < 3000) { // FIXME: check size
390 av_log(s
->avctx
, AV_LOG_ERROR
, "encoded frame too large\n");
395 ff_init_block_index(&s
->m
);
396 ff_update_block_index(&s
->m
);
398 if (s
->picture
.pict_type
== AV_PICTURE_TYPE_I
||
399 (s
->m
.mb_type
[x
+ y
* s
->m
.mb_stride
] &
400 CANDIDATE_MB_TYPE_INTRA
)) {
401 for (i
= 0; i
< 6; i
++)
402 init_put_bits(&s
->reorder_pb
[i
], reorder_buffer
[0][i
],
404 if (s
->picture
.pict_type
== AV_PICTURE_TYPE_P
) {
405 const uint8_t *vlc
= ff_svq1_block_type_vlc
[SVQ1_BLOCK_INTRA
];
406 put_bits(&s
->reorder_pb
[5], vlc
[1], vlc
[0]);
407 score
[0] = vlc
[1] * lambda
;
409 score
[0] += encode_block(s
, src
+ 16 * x
, NULL
, temp
, stride
,
411 for (i
= 0; i
< 6; i
++) {
412 count
[0][i
] = put_bits_count(&s
->reorder_pb
[i
]);
413 flush_put_bits(&s
->reorder_pb
[i
]);
420 if (s
->picture
.pict_type
== AV_PICTURE_TYPE_P
) {
421 const uint8_t *vlc
= ff_svq1_block_type_vlc
[SVQ1_BLOCK_INTER
];
422 int mx
, my
, pred_x
, pred_y
, dxy
;
425 motion_ptr
= ff_h263_pred_motion(&s
->m
, 0, 0, &pred_x
, &pred_y
);
426 if (s
->m
.mb_type
[x
+ y
* s
->m
.mb_stride
] &
427 CANDIDATE_MB_TYPE_INTER
) {
428 for (i
= 0; i
< 6; i
++)
429 init_put_bits(&s
->reorder_pb
[i
], reorder_buffer
[1][i
],
432 put_bits(&s
->reorder_pb
[5], vlc
[1], vlc
[0]);
434 s
->m
.pb
= s
->reorder_pb
[5];
437 assert(mx
>= -32 && mx
<= 31);
438 assert(my
>= -32 && my
<= 31);
439 assert(pred_x
>= -32 && pred_x
<= 31);
440 assert(pred_y
>= -32 && pred_y
<= 31);
441 ff_h263_encode_motion(&s
->m
, mx
- pred_x
, 1);
442 ff_h263_encode_motion(&s
->m
, my
- pred_y
, 1);
443 s
->reorder_pb
[5] = s
->m
.pb
;
444 score
[1] += lambda
* put_bits_count(&s
->reorder_pb
[5]);
446 dxy
= (mx
& 1) + 2 * (my
& 1);
448 s
->dsp
.put_pixels_tab
[0][dxy
](temp
+ 16,
453 score
[1] += encode_block(s
, src
+ 16 * x
, temp
+ 16,
454 decoded
, stride
, 5, 64, lambda
, 0);
455 best
= score
[1] <= score
[0];
457 vlc
= ff_svq1_block_type_vlc
[SVQ1_BLOCK_SKIP
];
458 score
[2] = s
->dsp
.sse
[0](NULL
, src
+ 16 * x
, ref
,
460 score
[2] += vlc
[1] * lambda
;
461 if (score
[2] < score
[best
] && mx
== 0 && my
== 0) {
463 s
->dsp
.put_pixels_tab
[0][0](decoded
, ref
, stride
, 16);
464 for (i
= 0; i
< 6; i
++)
466 put_bits(&s
->pb
, vlc
[1], vlc
[0]);
471 for (i
= 0; i
< 6; i
++) {
472 count
[1][i
] = put_bits_count(&s
->reorder_pb
[i
]);
473 flush_put_bits(&s
->reorder_pb
[i
]);
480 motion_ptr
[0 + 2 * s
->m
.b8_stride
] =
481 motion_ptr
[1 + 2 * s
->m
.b8_stride
] =
482 motion_ptr
[2 + 2 * s
->m
.b8_stride
] =
483 motion_ptr
[3 + 2 * s
->m
.b8_stride
] = 0;
487 s
->rd_total
+= score
[best
];
489 for (i
= 5; i
>= 0; i
--)
490 avpriv_copy_bits(&s
->pb
, reorder_buffer
[best
][i
],
493 s
->dsp
.put_pixels_tab
[0][0](decoded
, temp
, stride
, 16);
495 s
->m
.first_slice_line
= 0;
500 static av_cold
int svq1_encode_init(AVCodecContext
*avctx
)
502 SVQ1Context
*const s
= avctx
->priv_data
;
504 ff_dsputil_init(&s
->dsp
, avctx
);
505 avctx
->coded_frame
= &s
->picture
;
507 s
->frame_width
= avctx
->width
;
508 s
->frame_height
= avctx
->height
;
510 s
->y_block_width
= (s
->frame_width
+ 15) / 16;
511 s
->y_block_height
= (s
->frame_height
+ 15) / 16;
513 s
->c_block_width
= (s
->frame_width
/ 4 + 15) / 16;
514 s
->c_block_height
= (s
->frame_height
/ 4 + 15) / 16;
518 s
->m
.picture_structure
= PICT_FRAME
;
520 s
->m
.me
.scratchpad
= av_mallocz((avctx
->width
+ 64) *
521 2 * 16 * 2 * sizeof(uint8_t));
522 s
->m
.me
.map
= av_mallocz(ME_MAP_SIZE
* sizeof(uint32_t));
523 s
->m
.me
.score_map
= av_mallocz(ME_MAP_SIZE
* sizeof(uint32_t));
524 s
->mb_type
= av_mallocz((s
->y_block_width
+ 1) *
525 s
->y_block_height
* sizeof(int16_t));
526 s
->dummy
= av_mallocz((s
->y_block_width
+ 1) *
527 s
->y_block_height
* sizeof(int32_t));
528 ff_h263_encode_init(&s
->m
); // mv_penalty
533 static int svq1_encode_frame(AVCodecContext
*avctx
, AVPacket
*pkt
,
534 const AVFrame
*pict
, int *got_packet
)
536 SVQ1Context
*const s
= avctx
->priv_data
;
537 AVFrame
*const p
= &s
->picture
;
542 (ret
= av_new_packet(pkt
, s
->y_block_width
* s
->y_block_height
*
543 MAX_MB_BYTES
* 3 + FF_MIN_BUFFER_SIZE
) < 0)) {
544 av_log(avctx
, AV_LOG_ERROR
, "Error getting output packet.\n");
548 if (avctx
->pix_fmt
!= AV_PIX_FMT_YUV410P
) {
549 av_log(avctx
, AV_LOG_ERROR
, "unsupported pixel format\n");
553 if (!s
->current_picture
.data
[0]) {
554 ff_get_buffer(avctx
, &s
->current_picture
);
555 ff_get_buffer(avctx
, &s
->last_picture
);
556 s
->scratchbuf
= av_malloc(s
->current_picture
.linesize
[0] * 16 * 2);
559 temp
= s
->current_picture
;
560 s
->current_picture
= s
->last_picture
;
561 s
->last_picture
= temp
;
563 init_put_bits(&s
->pb
, pkt
->data
, pkt
->size
);
566 p
->pict_type
= avctx
->gop_size
&& avctx
->frame_number
% avctx
->gop_size ?
567 AV_PICTURE_TYPE_P
: AV_PICTURE_TYPE_I
;
568 p
->key_frame
= p
->pict_type
== AV_PICTURE_TYPE_I
;
570 svq1_write_header(s
, p
->pict_type
);
571 for (i
= 0; i
< 3; i
++)
572 if (svq1_encode_plane(s
, i
,
574 s
->last_picture
.data
[i
],
575 s
->current_picture
.data
[i
],
576 s
->frame_width
/ (i ?
4 : 1),
577 s
->frame_height
/ (i ?
4 : 1),
578 s
->picture
.linesize
[i
],
579 s
->current_picture
.linesize
[i
]) < 0)
582 // avpriv_align_put_bits(&s->pb);
583 while (put_bits_count(&s
->pb
) & 31)
584 put_bits(&s
->pb
, 1, 0);
586 flush_put_bits(&s
->pb
);
588 pkt
->size
= put_bits_count(&s
->pb
) / 8;
589 if (p
->pict_type
== AV_PICTURE_TYPE_I
)
590 pkt
->flags
|= AV_PKT_FLAG_KEY
;
596 static av_cold
int svq1_encode_end(AVCodecContext
*avctx
)
598 SVQ1Context
*const s
= avctx
->priv_data
;
601 av_log(avctx
, AV_LOG_DEBUG
, "RD: %f\n",
602 s
->rd_total
/ (double)(avctx
->width
* avctx
->height
*
603 avctx
->frame_number
));
605 av_freep(&s
->m
.me
.scratchpad
);
606 av_freep(&s
->m
.me
.map
);
607 av_freep(&s
->m
.me
.score_map
);
608 av_freep(&s
->mb_type
);
610 av_freep(&s
->scratchbuf
);
612 for (i
= 0; i
< 3; i
++) {
613 av_freep(&s
->motion_val8
[i
]);
614 av_freep(&s
->motion_val16
[i
]);
620 AVCodec ff_svq1_encoder
= {
622 .type
= AVMEDIA_TYPE_VIDEO
,
623 .id
= AV_CODEC_ID_SVQ1
,
624 .priv_data_size
= sizeof(SVQ1Context
),
625 .init
= svq1_encode_init
,
626 .encode2
= svq1_encode_frame
,
627 .close
= svq1_encode_end
,
628 .pix_fmts
= (const enum PixelFormat
[]) { AV_PIX_FMT_YUV410P
,
630 .long_name
= NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 1 / Sorenson Video 1 / SVQ1"),