vp9: split superframes in the filtering stage before actual decoding
[libav.git] / libavcodec / vp9.c
index 7989ca8..48f8afe 100644 (file)
@@ -64,7 +64,7 @@ static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
     f->mv               = (VP9MVRefPair*)f->mv_buf->data;
 
     if (s->segmentation.enabled && !s->segmentation.update_map &&
-        !s->keyframe && !s->intraonly)
+        !s->keyframe && !s->intraonly && !s->errorres)
         memcpy(f->segmentation_map, s->frames[LAST_FRAME].segmentation_map, sz);
 
     return 0;
@@ -1188,14 +1188,18 @@ static int update_refs(AVCodecContext *avctx)
     return 0;
 }
 
-static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
-                            int *got_frame, const uint8_t *data, int size,
-                            int can_finish_setup)
+static int vp9_decode_frame(AVCodecContext *avctx, void *output,
+                            int *got_frame, AVPacket *pkt)
 {
     VP9Context *s = avctx->priv_data;
+    AVFrame      *frame = output;
+    const uint8_t *data = pkt->data;
+    int            size = pkt->size;
     AVFrame *f;
     int ret, tile_row, tile_col, i, ref = -1, row, col;
 
+    s->setup_finished = 0;
+
     ret = decode_frame_header(avctx, data, size, &ref);
     if (ret < 0) {
         return ret;
@@ -1210,7 +1214,7 @@ static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
         if (ret < 0)
             return ret;
         *got_frame = 1;
-        return 0;
+        return pkt->size;
     }
     data += ret;
     size -= ret;
@@ -1261,7 +1265,7 @@ static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
         s->prob_ctx[s->framectxid].p = s->prob.p;
     }
     if ((s->parallelmode || !s->refreshctx) &&
-        can_finish_setup && avctx->active_thread_type & FF_THREAD_FRAME) {
+        avctx->active_thread_type & FF_THREAD_FRAME) {
         ff_thread_finish_setup(avctx);
         s->setup_finished = 1;
     }
@@ -1280,129 +1284,129 @@ static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
 
     do {
         ptrdiff_t yoff = 0, uvoff = 0;
-    s->b          = s->b_base;
-    s->block      = s->block_base;
-    s->uvblock[0] = s->uvblock_base[0];
-    s->uvblock[1] = s->uvblock_base[1];
-    s->eob        = s->eob_base;
-    s->uveob[0]   = s->uveob_base[0];
-    s->uveob[1]   = s->uveob_base[1];
-
-    for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
-        set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
-                        tile_row, s->tiling.log2_tile_rows, s->sb_rows);
-
-        if (s->pass != 2) {
-        for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
-            int64_t tile_size;
-
-            if (tile_col == s->tiling.tile_cols - 1 &&
-                tile_row == s->tiling.tile_rows - 1) {
-                tile_size = size;
-            } else {
-                tile_size = AV_RB32(data);
-                data     += 4;
-                size     -= 4;
-            }
-            if (tile_size > size) {
-                ret = AVERROR_INVALIDDATA;
-                goto fail;
-            }
-            ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
-            if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
-                ret = AVERROR_INVALIDDATA;
-                goto fail;
-            }
-            data += tile_size;
-            size -= tile_size;
-        }
-        }
-
-        for (row = s->tiling.tile_row_start;
-             row < s->tiling.tile_row_end;
-             row += 8, yoff += f->linesize[0] * 64,
-             uvoff += f->linesize[1] * 32) {
-            VP9Filter *lflvl = s->lflvl;
-            ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
-
-            for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
-                set_tile_offset(&s->tiling.tile_col_start,
-                                &s->tiling.tile_col_end,
-                                tile_col, s->tiling.log2_tile_cols, s->sb_cols);
-
-                memset(s->left_partition_ctx, 0, 8);
-                memset(s->left_skip_ctx, 0, 8);
-                if (s->keyframe || s->intraonly)
-                    memset(s->left_mode_ctx, DC_PRED, 16);
-                else
-                    memset(s->left_mode_ctx, NEARESTMV, 8);
-                memset(s->left_y_nnz_ctx, 0, 16);
-                memset(s->left_uv_nnz_ctx, 0, 16);
-                memset(s->left_segpred_ctx, 0, 8);
-
-                memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
-                for (col = s->tiling.tile_col_start;
-                     col < s->tiling.tile_col_end;
-                     col += 8, yoff2 += 64, uvoff2 += 32, lflvl++) {
-                    // FIXME integrate with lf code (i.e. zero after each
-                    // use, similar to invtxfm coefficients, or similar)
-                    if (s->pass != 1)
-                        memset(lflvl->mask, 0, sizeof(lflvl->mask));
-
-                    if (s->pass == 2) {
-                        ret = decode_superblock_mem(avctx, row, col, lflvl,
-                                                    yoff2, uvoff2, BL_64X64);
+        s->b          = s->b_base;
+        s->block      = s->block_base;
+        s->uvblock[0] = s->uvblock_base[0];
+        s->uvblock[1] = s->uvblock_base[1];
+        s->eob        = s->eob_base;
+        s->uveob[0]   = s->uveob_base[0];
+        s->uveob[1]   = s->uveob_base[1];
+
+        for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
+            set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
+                            tile_row, s->tiling.log2_tile_rows, s->sb_rows);
+
+            if (s->pass != 2) {
+                for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
+                    int64_t tile_size;
+
+                    if (tile_col == s->tiling.tile_cols - 1 &&
+                        tile_row == s->tiling.tile_rows - 1) {
+                        tile_size = size;
                     } else {
-                        ret = decode_subblock(avctx, row, col, lflvl,
-                                              yoff2, uvoff2, BL_64X64);
+                        tile_size = AV_RB32(data);
+                        data     += 4;
+                        size     -= 4;
                     }
-                    if (ret < 0)
+                    if (tile_size > size) {
+                        ret = AVERROR_INVALIDDATA;
                         goto fail;
+                    }
+                    ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
+                    if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
+                        ret = AVERROR_INVALIDDATA;
+                        goto fail;
+                    }
+                    data += tile_size;
+                    size -= tile_size;
                 }
-                if (s->pass != 2)
-                    memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
             }
 
-            if (s->pass == 1)
-                continue;
-
-            // backup pre-loopfilter reconstruction data for intra
-            // prediction of next row of sb64s
-            if (row + 8 < s->rows) {
-                memcpy(s->intra_pred_data[0],
-                       f->data[0] + yoff +
-                       63 * f->linesize[0],
-                       8 * s->cols);
-                memcpy(s->intra_pred_data[1],
-                       f->data[1] + uvoff +
-                       31 * f->linesize[1],
-                       4 * s->cols);
-                memcpy(s->intra_pred_data[2],
-                       f->data[2] + uvoff +
-                       31 * f->linesize[2],
-                       4 * s->cols);
-            }
+            for (row = s->tiling.tile_row_start;
+                 row < s->tiling.tile_row_end;
+                 row += 8, yoff += f->linesize[0] * 64,
+                 uvoff += f->linesize[1] * 32) {
+                VP9Filter *lflvl = s->lflvl;
+                ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
+
+                for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
+                    set_tile_offset(&s->tiling.tile_col_start,
+                                    &s->tiling.tile_col_end,
+                                    tile_col, s->tiling.log2_tile_cols, s->sb_cols);
+
+                    memset(s->left_partition_ctx, 0, 8);
+                    memset(s->left_skip_ctx, 0, 8);
+                    if (s->keyframe || s->intraonly)
+                        memset(s->left_mode_ctx, DC_PRED, 16);
+                    else
+                        memset(s->left_mode_ctx, NEARESTMV, 8);
+                    memset(s->left_y_nnz_ctx, 0, 16);
+                    memset(s->left_uv_nnz_ctx, 0, 16);
+                    memset(s->left_segpred_ctx, 0, 8);
+
+                    memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
+                    for (col = s->tiling.tile_col_start;
+                         col < s->tiling.tile_col_end;
+                         col += 8, yoff2 += 64, uvoff2 += 32, lflvl++) {
+                        // FIXME integrate with lf code (i.e. zero after each
+                        // use, similar to invtxfm coefficients, or similar)
+                        if (s->pass != 1)
+                            memset(lflvl->mask, 0, sizeof(lflvl->mask));
+
+                        if (s->pass == 2) {
+                            ret = decode_superblock_mem(avctx, row, col, lflvl,
+                                                        yoff2, uvoff2, BL_64X64);
+                        } else {
+                            ret = decode_subblock(avctx, row, col, lflvl,
+                                                  yoff2, uvoff2, BL_64X64);
+                        }
+                        if (ret < 0)
+                            goto fail;
+                    }
+                    if (s->pass != 2)
+                        memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
+                }
 
-            // loopfilter one row
-            if (s->filter.level) {
-                yoff2  = yoff;
-                uvoff2 = uvoff;
-                lflvl  = s->lflvl;
-                for (col = 0; col < s->cols;
-                     col += 8, yoff2 += 64, uvoff2 += 32, lflvl++)
-                    loopfilter_subblock(avctx, lflvl, row, col, yoff2, uvoff2);
-            }
+                if (s->pass == 1)
+                    continue;
+
+                // backup pre-loopfilter reconstruction data for intra
+                // prediction of next row of sb64s
+                if (row + 8 < s->rows) {
+                    memcpy(s->intra_pred_data[0],
+                           f->data[0] + yoff +
+                           63 * f->linesize[0],
+                           8 * s->cols);
+                    memcpy(s->intra_pred_data[1],
+                           f->data[1] + uvoff +
+                           31 * f->linesize[1],
+                           4 * s->cols);
+                    memcpy(s->intra_pred_data[2],
+                           f->data[2] + uvoff +
+                           31 * f->linesize[2],
+                           4 * s->cols);
+                }
 
-            // FIXME maybe we can make this more finegrained by running the
-            // loopfilter per-block instead of after each sbrow
-            // In fact that would also make intra pred left preparation easier?
-            ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
+                // loopfilter one row
+                if (s->filter.level) {
+                    yoff2  = yoff;
+                    uvoff2 = uvoff;
+                    lflvl  = s->lflvl;
+                    for (col = 0; col < s->cols;
+                         col += 8, yoff2 += 64, uvoff2 += 32, lflvl++)
+                        loopfilter_subblock(avctx, lflvl, row, col, yoff2, uvoff2);
+                }
+
+                // FIXME maybe we can make this more finegrained by running the
+                // loopfilter per-block instead of after each sbrow
+                // In fact that would also make intra pred left preparation easier?
+                ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
+            }
         }
-    }
 
         if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
             ff_vp9_adapt_probs(s);
-            if (can_finish_setup && avctx->active_thread_type & FF_THREAD_FRAME) {
+            if (avctx->active_thread_type & FF_THREAD_FRAME) {
                 ff_thread_finish_setup(avctx);
                 s->setup_finished = 1;
             }
@@ -1428,60 +1432,7 @@ fail:
         *got_frame = 1;
     }
 
-    return 0;
-}
-
-static int vp9_decode_packet(AVCodecContext *avctx, void *frame,
-                             int *got_frame, AVPacket *avpkt)
-{
-    VP9Context *s = avctx->priv_data;
-    const uint8_t *data = avpkt->data;
-    int size            = avpkt->size;
-    int marker, ret;
-
-    s->setup_finished = 0;
-
-    /* Read superframe index - this is a collection of individual frames
-     * that together lead to one visible frame */
-    marker = data[size - 1];
-    if ((marker & 0xe0) == 0xc0) {
-        int nbytes   = 1 + ((marker >> 3) & 0x3);
-        int n_frames = 1 + (marker & 0x7);
-        int idx_sz   = 2 + n_frames * nbytes;
-
-        if (size >= idx_sz && data[size - idx_sz] == marker) {
-            const uint8_t *idx = data + size + 1 - idx_sz;
-
-            while (n_frames--) {
-                unsigned sz = AV_RL32(idx);
-
-                if (nbytes < 4)
-                    sz &= (1 << (8 * nbytes)) - 1;
-                idx += nbytes;
-
-                if (sz > size) {
-                    av_log(avctx, AV_LOG_ERROR,
-                           "Superframe packet size too big: %u > %d\n",
-                           sz, size);
-                    return AVERROR_INVALIDDATA;
-                }
-
-                ret = vp9_decode_frame(avctx, frame, got_frame, data, sz,
-                                       !n_frames);
-                if (ret < 0)
-                    return ret;
-                data += sz;
-                size -= sz;
-            }
-            return avpkt->size;
-        }
-    }
-
-    /* If we get here, there was no valid superframe index, i.e. this is just
-     * one whole single frame. Decode it as such from the complete input buf. */
-    if ((ret = vp9_decode_frame(avctx, frame, got_frame, data, size, 1)) < 0)
-        return ret;
-    return size;
+    return pkt->size;
 }
 
 static av_cold int vp9_decode_free(AVCodecContext *avctx)
@@ -1584,16 +1535,17 @@ static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecCo
 }
 
 AVCodec ff_vp9_decoder = {
-    .name           = "vp9",
-    .long_name      = NULL_IF_CONFIG_SMALL("Google VP9"),
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_VP9,
-    .priv_data_size = sizeof(VP9Context),
-    .init           = vp9_decode_init,
-    .decode         = vp9_decode_packet,
-    .flush          = vp9_decode_flush,
-    .close          = vp9_decode_free,
-    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
+    .name                  = "vp9",
+    .long_name             = NULL_IF_CONFIG_SMALL("Google VP9"),
+    .type                  = AVMEDIA_TYPE_VIDEO,
+    .id                    = AV_CODEC_ID_VP9,
+    .priv_data_size        = sizeof(VP9Context),
+    .init                  = vp9_decode_init,
+    .decode                = vp9_decode_frame,
+    .flush                 = vp9_decode_flush,
+    .close                 = vp9_decode_free,
+    .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .init_thread_copy      = vp9_decode_init,
     .update_thread_context = vp9_decode_update_thread_context,
+    .bsfs                  = "vp9_superframe_split",
 };