2 * Lagarith lossless decoder
3 * Copyright (c) 2009 Nathan Caldwell <saintdev (at) gmail.com>
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * Lagarith lossless decoder
25 * @author Nathan Caldwell
32 #include "lagarithrac.h"
35 enum LagarithFrameType
{
36 FRAME_RAW
= 1, /**< uncompressed */
37 FRAME_U_RGB24
= 2, /**< unaligned RGB24 */
38 FRAME_ARITH_YUY2
= 3, /**< arithmetic coded YUY2 */
39 FRAME_ARITH_RGB24
= 4, /**< arithmetic coded RGB24 */
40 FRAME_SOLID_GRAY
= 5, /**< solid grayscale color frame */
41 FRAME_SOLID_COLOR
= 6, /**< solid non-grayscale color frame */
42 FRAME_OLD_ARITH_RGB
= 7, /**< obsolete arithmetic coded RGB (no longer encoded by upstream since version 1.1.0) */
43 FRAME_ARITH_RGBA
= 8, /**< arithmetic coded RGBA */
44 FRAME_SOLID_RGBA
= 9, /**< solid RGBA color frame */
45 FRAME_ARITH_YV12
= 10, /**< arithmetic coded YV12 */
46 FRAME_REDUCED_RES
= 11, /**< reduced resolution YV12 frame */
49 typedef struct LagarithContext
{
50 AVCodecContext
*avctx
;
52 int zeros
; /**< number of consecutive zero bytes encountered */
53 int zeros_rem
; /**< number of zero bytes remaining to output */
59 * Compute the 52bit mantissa of 1/(double)denom.
60 * This crazy format uses floats in an entropy coder and we have to match x86
61 * rounding exactly, thus ordinary floats aren't portable enough.
62 * @param denom denominator
63 * @return 52bit mantissa
66 static uint64_t softfloat_reciprocal(uint32_t denom
)
68 int shift
= av_log2(denom
- 1) + 1;
69 uint64_t ret
= (1ULL << 52) / denom
;
70 uint64_t err
= (1ULL << 52) - ret
* denom
;
74 return ret
+ err
/ denom
;
78 * (uint32_t)(x*f), where f has the given mantissa, and exponent 0
79 * Used in combination with softfloat_reciprocal computes x/(double)denom.
80 * @param x 32bit integer factor
81 * @param mantissa mantissa of f with exponent 0
82 * @return 32bit integer value (x*f)
83 * @see softfloat_reciprocal
85 static uint32_t softfloat_mul(uint32_t x
, uint64_t mantissa
)
87 uint64_t l
= x
* (mantissa
& 0xffffffff);
88 uint64_t h
= x
* (mantissa
>> 32);
91 l
+= 1 << av_log2(h
>> 21);
96 static uint8_t lag_calc_zero_run(int8_t x
)
98 return (x
<< 1) ^ (x
>> 7);
101 static int lag_decode_prob(GetBitContext
*gb
, uint32_t *value
)
103 static const uint8_t series
[] = { 1, 2, 3, 5, 8, 13, 21 };
110 for (i
= 0; i
< 7; i
++) {
119 if (bits
< 0 || bits
> 31) {
122 } else if (bits
== 0) {
127 val
= get_bits_long(gb
, bits
);
135 static int lag_read_prob_header(lag_rac
*rac
, GetBitContext
*gb
)
137 int i
, j
, scale_factor
;
138 unsigned prob
, cumulative_target
;
139 unsigned cumul_prob
= 0;
140 unsigned scaled_cumul_prob
= 0;
143 rac
->prob
[257] = UINT_MAX
;
144 /* Read probabilities from bitstream */
145 for (i
= 1; i
< 257; i
++) {
146 if (lag_decode_prob(gb
, &rac
->prob
[i
]) < 0) {
147 av_log(rac
->avctx
, AV_LOG_ERROR
, "Invalid probability encountered.\n");
150 if ((uint64_t)cumul_prob
+ rac
->prob
[i
] > UINT_MAX
) {
151 av_log(rac
->avctx
, AV_LOG_ERROR
, "Integer overflow encountered in cumulative probability calculation.\n");
154 cumul_prob
+= rac
->prob
[i
];
156 if (lag_decode_prob(gb
, &prob
)) {
157 av_log(rac
->avctx
, AV_LOG_ERROR
, "Invalid probability run encountered.\n");
162 for (j
= 0; j
< prob
; j
++)
168 av_log(rac
->avctx
, AV_LOG_ERROR
, "All probabilities are 0!\n");
172 /* Scale probabilities so cumulative probability is an even power of 2. */
173 scale_factor
= av_log2(cumul_prob
);
175 if (cumul_prob
& (cumul_prob
- 1)) {
176 uint64_t mul
= softfloat_reciprocal(cumul_prob
);
177 for (i
= 1; i
< 257; i
++) {
178 rac
->prob
[i
] = softfloat_mul(rac
->prob
[i
], mul
);
179 scaled_cumul_prob
+= rac
->prob
[i
];
183 cumulative_target
= 1 << scale_factor
;
185 if (scaled_cumul_prob
> cumulative_target
) {
186 av_log(rac
->avctx
, AV_LOG_ERROR
,
187 "Scaled probabilities are larger than target!\n");
191 scaled_cumul_prob
= cumulative_target
- scaled_cumul_prob
;
193 for (i
= 1; scaled_cumul_prob
; i
= (i
& 0x7f) + 1) {
198 /* Comment from reference source:
199 * if (b & 0x80 == 0) { // order of operations is 'wrong'; it has been left this way
200 * // since the compression change is negligible and fixing it
201 * // breaks backwards compatibility
202 * b =- (signed int)b;
212 rac
->scale
= scale_factor
;
214 /* Fill probability array with cumulative probability for each symbol. */
215 for (i
= 1; i
< 257; i
++)
216 rac
->prob
[i
] += rac
->prob
[i
- 1];
221 static void add_lag_median_prediction(uint8_t *dst
, uint8_t *src1
,
222 uint8_t *diff
, int w
, int *left
,
225 /* This is almost identical to add_hfyu_median_prediction in dsputil.h.
226 * However the &0xFF on the gradient predictor yealds incorrect output
235 for (i
= 0; i
< w
; i
++) {
236 l
= mid_pred(l
, src1
[i
], l
+ src1
[i
] - lt
) + diff
[i
];
245 static void lag_pred_line(LagarithContext
*l
, uint8_t *buf
,
246 int width
, int stride
, int line
)
251 int i
, align_width
= (width
- 1) & ~31;
252 /* Left prediction only for first line */
253 L
= l
->dsp
.add_hfyu_left_prediction(buf
+ 1, buf
+ 1,
254 align_width
, buf
[0]);
255 for (i
= align_width
+ 1; i
< width
; i
++)
256 buf
[i
] += buf
[i
- 1];
258 /* Left pixel is actually prev_row[width] */
259 L
= buf
[width
- stride
- 1];
262 /* Second line, left predict first pixel, the rest of the line is median predicted
263 * NOTE: In the case of RGB this pixel is top predicted */
264 TL
= l
->avctx
->pix_fmt
== AV_PIX_FMT_YUV420P ? buf
[-stride
] : L
;
266 /* Top left is 2 rows back, last pixel */
267 TL
= buf
[width
- (2 * stride
) - 1];
270 add_lag_median_prediction(buf
, buf
- stride
, buf
,
275 static void lag_pred_line_yuy2(LagarithContext
*l
, uint8_t *buf
,
276 int width
, int stride
, int line
,
288 align_width
= (width
- 1) & ~31;
289 l
->dsp
.add_hfyu_left_prediction(buf
+ 1, buf
+ 1, align_width
, buf
[0]);
291 for (i
= align_width
+ 1; i
< width
; i
++)
292 buf
[i
] += buf
[i
- 1];
297 const int HEAD
= is_luma ?
4 : 2;
300 L
= buf
[width
- stride
- 1];
301 TL
= buf
[HEAD
- stride
- 1];
302 for (i
= 0; i
< HEAD
; i
++) {
306 for (; i
< width
; i
++) {
307 L
= mid_pred(L
& 0xFF, buf
[i
- stride
], (L
+ buf
[i
- stride
] - TL
) & 0xFF) + buf
[i
];
308 TL
= buf
[i
- stride
];
312 TL
= buf
[width
- (2 * stride
) - 1];
313 L
= buf
[width
- stride
- 1];
314 l
->dsp
.add_hfyu_median_prediction(buf
, buf
- stride
, buf
, width
,
319 static int lag_decode_line(LagarithContext
*l
, lag_rac
*rac
,
320 uint8_t *dst
, int width
, int stride
,
329 /* Output any zeros remaining from the previous run */
332 int count
= FFMIN(l
->zeros_rem
, width
- i
);
333 memset(dst
+ i
, 0, count
);
335 l
->zeros_rem
-= count
;
339 dst
[i
] = lag_get_rac(rac
);
348 if (l
->zeros
== esc_count
) {
349 int index
= lag_get_rac(rac
);
354 l
->zeros_rem
= lag_calc_zero_run(index
);
361 static int lag_decode_zero_run_line(LagarithContext
*l
, uint8_t *dst
,
362 const uint8_t *src
, const uint8_t *src_end
,
363 int width
, int esc_count
)
367 uint8_t zero_run
= 0;
368 const uint8_t *src_start
= src
;
369 uint8_t mask1
= -(esc_count
< 2);
370 uint8_t mask2
= -(esc_count
< 3);
371 uint8_t *end
= dst
+ (width
- 2);
375 count
= FFMIN(l
->zeros_rem
, width
- i
);
376 if (end
- dst
< count
) {
377 av_log(l
->avctx
, AV_LOG_ERROR
, "Too many zeros remaining.\n");
378 return AVERROR_INVALIDDATA
;
381 memset(dst
, 0, count
);
382 l
->zeros_rem
-= count
;
388 while (!zero_run
&& dst
+ i
< end
) {
390 if (src
+ i
>= src_end
)
391 return AVERROR_INVALIDDATA
;
393 !(src
[i
] | (src
[i
+ 1] & mask1
) | (src
[i
+ 2] & mask2
));
400 l
->zeros_rem
= lag_calc_zero_run(src
[i
]);
410 return src_start
- src
;
415 static int lag_decode_arith_plane(LagarithContext
*l
, uint8_t *dst
,
416 int width
, int height
, int stride
,
417 const uint8_t *src
, int src_size
)
423 int esc_count
= src
[0];
426 const uint8_t *src_end
= src
+ src_size
;
428 rac
.avctx
= l
->avctx
;
432 length
= width
* height
;
433 if (esc_count
&& AV_RL32(src
+ 1) < length
) {
434 length
= AV_RL32(src
+ 1);
438 init_get_bits(&gb
, src
+ offset
, src_size
* 8);
440 if (lag_read_prob_header(&rac
, &gb
) < 0)
443 ff_lag_rac_init(&rac
, &gb
, length
- stride
);
445 for (i
= 0; i
< height
; i
++)
446 read
+= lag_decode_line(l
, &rac
, dst
+ (i
* stride
), width
,
450 av_log(l
->avctx
, AV_LOG_WARNING
,
451 "Output more bytes than length (%d of %d)\n", read
,
453 } else if (esc_count
< 8) {
456 /* Zero run coding only, no range coding. */
457 for (i
= 0; i
< height
; i
++) {
458 int res
= lag_decode_zero_run_line(l
, dst
+ (i
* stride
), src
,
459 src_end
, width
, esc_count
);
465 if (src_size
< width
* height
)
466 return AVERROR_INVALIDDATA
; // buffer not big enough
467 /* Plane is stored uncompressed */
468 for (i
= 0; i
< height
; i
++) {
469 memcpy(dst
+ (i
* stride
), src
, width
);
473 } else if (esc_count
== 0xff) {
474 /* Plane is a solid run of given value */
475 for (i
= 0; i
< height
; i
++)
476 memset(dst
+ i
* stride
, src
[1], width
);
477 /* Do not apply prediction.
478 Note: memset to 0 above, setting first value to src[1]
479 and applying prediction gives the same result. */
482 av_log(l
->avctx
, AV_LOG_ERROR
,
483 "Invalid zero run escape code! (%#x)\n", esc_count
);
487 if (l
->avctx
->pix_fmt
!= AV_PIX_FMT_YUV422P
) {
488 for (i
= 0; i
< height
; i
++) {
489 lag_pred_line(l
, dst
, width
, stride
, i
);
493 for (i
= 0; i
< height
; i
++) {
494 lag_pred_line_yuy2(l
, dst
, width
, stride
, i
,
495 width
== l
->avctx
->width
);
505 * @param avctx codec context
506 * @param data output AVFrame
507 * @param data_size size of output data or 0 if no picture is returned
508 * @param avpkt input packet
509 * @return number of consumed bytes on success or negative if decode fails
511 static int lag_decode_frame(AVCodecContext
*avctx
,
512 void *data
, int *got_frame
, AVPacket
*avpkt
)
514 const uint8_t *buf
= avpkt
->data
;
515 int buf_size
= avpkt
->size
;
516 LagarithContext
*l
= avctx
->priv_data
;
517 ThreadFrame frame
= { .f
= data
};
518 AVFrame
*const p
= data
;
519 uint8_t frametype
= 0;
520 uint32_t offset_gu
= 0, offset_bv
= 0, offset_ry
= 9;
522 uint8_t *srcs
[4], *dst
;
523 int i
, j
, planes
= 3;
529 offset_gu
= AV_RL32(buf
+ 1);
530 offset_bv
= AV_RL32(buf
+ 5);
533 case FRAME_SOLID_RGBA
:
534 avctx
->pix_fmt
= AV_PIX_FMT_RGB32
;
536 if (ff_thread_get_buffer(avctx
, &frame
, 0) < 0) {
537 av_log(avctx
, AV_LOG_ERROR
, "get_buffer() failed\n");
542 for (j
= 0; j
< avctx
->height
; j
++) {
543 for (i
= 0; i
< avctx
->width
; i
++)
544 AV_WN32(dst
+ i
* 4, offset_gu
);
545 dst
+= p
->linesize
[0];
548 case FRAME_ARITH_RGBA
:
549 avctx
->pix_fmt
= AV_PIX_FMT_RGB32
;
552 offs
[3] = AV_RL32(buf
+ 9);
553 case FRAME_ARITH_RGB24
:
555 if (frametype
== FRAME_ARITH_RGB24
|| frametype
== FRAME_U_RGB24
)
556 avctx
->pix_fmt
= AV_PIX_FMT_RGB24
;
558 if (ff_thread_get_buffer(avctx
, &frame
, 0) < 0) {
559 av_log(avctx
, AV_LOG_ERROR
, "get_buffer() failed\n");
567 if (!l
->rgb_planes
) {
568 l
->rgb_stride
= FFALIGN(avctx
->width
, 16);
569 l
->rgb_planes
= av_malloc(l
->rgb_stride
* avctx
->height
* planes
+ 1);
570 if (!l
->rgb_planes
) {
571 av_log(avctx
, AV_LOG_ERROR
, "cannot allocate temporary buffer\n");
572 return AVERROR(ENOMEM
);
575 for (i
= 0; i
< planes
; i
++)
576 srcs
[i
] = l
->rgb_planes
+ (i
+ 1) * l
->rgb_stride
* avctx
->height
- l
->rgb_stride
;
577 if (offset_ry
>= buf_size
||
578 offset_gu
>= buf_size
||
579 offset_bv
>= buf_size
||
580 (planes
== 4 && offs
[3] >= buf_size
)) {
581 av_log(avctx
, AV_LOG_ERROR
,
582 "Invalid frame offsets\n");
583 return AVERROR_INVALIDDATA
;
585 for (i
= 0; i
< planes
; i
++)
586 lag_decode_arith_plane(l
, srcs
[i
],
587 avctx
->width
, avctx
->height
,
588 -l
->rgb_stride
, buf
+ offs
[i
],
591 for (i
= 0; i
< planes
; i
++)
592 srcs
[i
] = l
->rgb_planes
+ i
* l
->rgb_stride
* avctx
->height
;
593 for (j
= 0; j
< avctx
->height
; j
++) {
594 for (i
= 0; i
< avctx
->width
; i
++) {
601 if (frametype
== FRAME_ARITH_RGBA
) {
603 AV_WN32(dst
+ i
* 4, MKBETAG(a
, r
, g
, b
));
610 dst
+= p
->linesize
[0];
611 for (i
= 0; i
< planes
; i
++)
612 srcs
[i
] += l
->rgb_stride
;
615 case FRAME_ARITH_YUY2
:
616 avctx
->pix_fmt
= AV_PIX_FMT_YUV422P
;
618 if (ff_thread_get_buffer(avctx
, &frame
, 0) < 0) {
619 av_log(avctx
, AV_LOG_ERROR
, "get_buffer() failed\n");
623 if (offset_ry
>= buf_size
||
624 offset_gu
>= buf_size
||
625 offset_bv
>= buf_size
) {
626 av_log(avctx
, AV_LOG_ERROR
,
627 "Invalid frame offsets\n");
628 return AVERROR_INVALIDDATA
;
631 lag_decode_arith_plane(l
, p
->data
[0], avctx
->width
, avctx
->height
,
632 p
->linesize
[0], buf
+ offset_ry
,
633 buf_size
- offset_ry
);
634 lag_decode_arith_plane(l
, p
->data
[1], avctx
->width
/ 2,
635 avctx
->height
, p
->linesize
[1],
636 buf
+ offset_gu
, buf_size
- offset_gu
);
637 lag_decode_arith_plane(l
, p
->data
[2], avctx
->width
/ 2,
638 avctx
->height
, p
->linesize
[2],
639 buf
+ offset_bv
, buf_size
- offset_bv
);
641 case FRAME_ARITH_YV12
:
642 avctx
->pix_fmt
= AV_PIX_FMT_YUV420P
;
644 if (ff_thread_get_buffer(avctx
, &frame
, 0) < 0) {
645 av_log(avctx
, AV_LOG_ERROR
, "get_buffer() failed\n");
649 if (offset_ry
>= buf_size
||
650 offset_gu
>= buf_size
||
651 offset_bv
>= buf_size
) {
652 av_log(avctx
, AV_LOG_ERROR
,
653 "Invalid frame offsets\n");
654 return AVERROR_INVALIDDATA
;
657 lag_decode_arith_plane(l
, p
->data
[0], avctx
->width
, avctx
->height
,
658 p
->linesize
[0], buf
+ offset_ry
,
659 buf_size
- offset_ry
);
660 lag_decode_arith_plane(l
, p
->data
[2], avctx
->width
/ 2,
661 avctx
->height
/ 2, p
->linesize
[2],
662 buf
+ offset_gu
, buf_size
- offset_gu
);
663 lag_decode_arith_plane(l
, p
->data
[1], avctx
->width
/ 2,
664 avctx
->height
/ 2, p
->linesize
[1],
665 buf
+ offset_bv
, buf_size
- offset_bv
);
668 av_log(avctx
, AV_LOG_ERROR
,
669 "Unsupported Lagarith frame type: %#x\n", frametype
);
678 static av_cold
int lag_decode_init(AVCodecContext
*avctx
)
680 LagarithContext
*l
= avctx
->priv_data
;
683 ff_dsputil_init(&l
->dsp
, avctx
);
688 static av_cold
int lag_decode_end(AVCodecContext
*avctx
)
690 LagarithContext
*l
= avctx
->priv_data
;
692 av_freep(&l
->rgb_planes
);
697 AVCodec ff_lagarith_decoder
= {
699 .long_name
= NULL_IF_CONFIG_SMALL("Lagarith lossless"),
700 .type
= AVMEDIA_TYPE_VIDEO
,
701 .id
= AV_CODEC_ID_LAGARITH
,
702 .priv_data_size
= sizeof(LagarithContext
),
703 .init
= lag_decode_init
,
704 .close
= lag_decode_end
,
705 .decode
= lag_decode_frame
,
706 .capabilities
= CODEC_CAP_DR1
| CODEC_CAP_FRAME_THREADS
,