jpeg2000: Proper cleanup on failure in decode_frame()
[libav.git] / libavcodec / jpeg2000dec.c
index 5481a83..15dfc2b 100644 (file)
@@ -72,7 +72,7 @@ typedef struct Jpeg2000DecoderContext {
     int             precision;
     int             ncomponents;
     int             tile_width, tile_height;
-    int             numXtiles, numYtiles;
+    unsigned        numXtiles, numYtiles;
     int             maxtilelen;
 
     Jpeg2000CodingStyle codsty[4];
@@ -84,8 +84,7 @@ typedef struct Jpeg2000DecoderContext {
     Jpeg2000Tile    *tile;
 
     /*options parameters*/
-    int16_t         lowres;
-    int16_t         reduction_factor;
+    int             reduction_factor;
 } Jpeg2000DecoderContext;
 
 /* get_bits functions for JPEG2000 packet bitstream
@@ -154,6 +153,7 @@ static int tag_tree_decode(Jpeg2000DecoderContext *s, Jpeg2000TgtNode *node,
 static int get_siz(Jpeg2000DecoderContext *s)
 {
     int i;
+    int ncomponents;
 
     if (bytestream2_get_bytes_left(&s->g) < 36)
         return AVERROR_INVALIDDATA;
@@ -167,7 +167,28 @@ static int get_siz(Jpeg2000DecoderContext *s)
     s->tile_height    = bytestream2_get_be32u(&s->g); // YTSiz
     s->tile_offset_x  = bytestream2_get_be32u(&s->g); // XT0Siz
     s->tile_offset_y  = bytestream2_get_be32u(&s->g); // YT0Siz
-    s->ncomponents       = bytestream2_get_be16u(&s->g); // CSiz
+    ncomponents       = bytestream2_get_be16u(&s->g); // CSiz
+
+    if (ncomponents <= 0) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid number of components: %d\n",
+               s->ncomponents);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (ncomponents > 3) {
+        avpriv_request_sample(s->avctx, "Support for %d components",
+                              s->ncomponents);
+        return AVERROR_PATCHWELCOME;
+    }
+
+    s->ncomponents = ncomponents;
+
+    if (s->tile_width <= 0 || s->tile_height <= 0 ||
+        s->tile_width > s->width || s->tile_height > s->height) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid tile dimension %dx%d.\n",
+               s->tile_width, s->tile_height);
+        return AVERROR_INVALIDDATA;
+    }
 
     if (bytestream2_get_bytes_left(&s->g) < 3 * s->ncomponents)
         return AVERROR_INVALIDDATA;
@@ -176,17 +197,29 @@ static int get_siz(Jpeg2000DecoderContext *s)
         uint8_t x    = bytestream2_get_byteu(&s->g);
         s->cbps[i]   = (x & 0x7f) + 1;
         s->precision = FFMAX(s->cbps[i], s->precision);
-        s->sgnd[i]   = (x & 0x80) == 1;
+        s->sgnd[i]   = !!(x & 0x80);
         s->cdx[i]    = bytestream2_get_byteu(&s->g);
         s->cdy[i]    = bytestream2_get_byteu(&s->g);
+
+        if (s->cdx[i] != 1 || s->cdy[i] != 1) {
+            avpriv_request_sample(s->avctx,
+                                  "CDxy values %d %d for component %d",
+                                  s->cdx[i], s->cdy[i], i);
+            if (!s->cdx[i] || !s->cdy[i])
+                return AVERROR_INVALIDDATA;
+            else
+                return AVERROR_PATCHWELCOME;
+        }
     }
 
     s->numXtiles = ff_jpeg2000_ceildiv(s->width  - s->tile_offset_x, s->tile_width);
     s->numYtiles = ff_jpeg2000_ceildiv(s->height - s->tile_offset_y, s->tile_height);
 
-    s->tile = av_mallocz(s->numXtiles * s->numYtiles * sizeof(*s->tile));
-    if (!s->tile)
+    s->tile = av_mallocz_array(s->numXtiles * s->numYtiles, sizeof(*s->tile));
+    if (!s->tile) {
+        s->numXtiles = s->numYtiles = 0;
         return AVERROR(ENOMEM);
+    }
 
     for (i = 0; i < s->numXtiles * s->numYtiles; i++) {
         Jpeg2000Tile *tile = s->tile + i;
@@ -202,37 +235,35 @@ static int get_siz(Jpeg2000DecoderContext *s)
     s->avctx->height = ff_jpeg2000_ceildivpow2(s->height - s->image_offset_y,
                                                s->reduction_factor);
 
-    switch (s->avctx->profile) {
-    case FF_PROFILE_JPEG2000_DCINEMA_2K:
-    case FF_PROFILE_JPEG2000_DCINEMA_4K:
-        /* XYZ color-space for digital cinema profiles */
-        s->avctx->pix_fmt = AV_PIX_FMT_XYZ12;
+    switch (s->ncomponents) {
+    case 1:
+        if (s->precision > 8)
+            s->avctx->pix_fmt = AV_PIX_FMT_GRAY16;
+        else
+            s->avctx->pix_fmt = AV_PIX_FMT_GRAY8;
         break;
-    default:
-        /* For other profiles selects color-space according number of
-         * components and bit depth precision. */
-        switch (s->ncomponents) {
-        case 1:
-            if (s->precision > 8)
-                s->avctx->pix_fmt = AV_PIX_FMT_GRAY16;
-            else
-                s->avctx->pix_fmt = AV_PIX_FMT_GRAY8;
+    case 3:
+        switch (s->avctx->profile) {
+        case FF_PROFILE_JPEG2000_DCINEMA_2K:
+        case FF_PROFILE_JPEG2000_DCINEMA_4K:
+            /* XYZ color-space for digital cinema profiles */
+            s->avctx->pix_fmt = AV_PIX_FMT_XYZ12;
             break;
-        case 3:
+        default:
             if (s->precision > 8)
                 s->avctx->pix_fmt = AV_PIX_FMT_RGB48;
             else
                 s->avctx->pix_fmt = AV_PIX_FMT_RGB24;
             break;
-        case 4:
-            s->avctx->pix_fmt = AV_PIX_FMT_BGRA;
-            break;
-        default:
-            /* pixel format can not be identified */
-            s->avctx->pix_fmt = AV_PIX_FMT_NONE;
-            break;
         }
         break;
+    case 4:
+        s->avctx->pix_fmt = AV_PIX_FMT_RGBA;
+        break;
+    default:
+        /* pixel format can not be identified */
+        s->avctx->pix_fmt = AV_PIX_FMT_NONE;
+        break;
     }
     return 0;
 }
@@ -284,6 +315,9 @@ static int get_cox(Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *c)
             c->log2_prec_widths[i]  =  byte       & 0x0F;    // precinct PPx
             c->log2_prec_heights[i] = (byte >> 4) & 0x0F;    // precinct PPy
         }
+    } else {
+        memset(c->log2_prec_widths , 15, sizeof(c->log2_prec_widths ));
+        memset(c->log2_prec_heights, 15, sizeof(c->log2_prec_heights));
     }
     return 0;
 }
@@ -293,14 +327,11 @@ static int get_cod(Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *c,
                    uint8_t *properties)
 {
     Jpeg2000CodingStyle tmp;
-    int compno;
+    int compno, ret;
 
     if (bytestream2_get_bytes_left(&s->g) < 5)
         return AVERROR_INVALIDDATA;
 
-    tmp.log2_prec_width  =
-    tmp.log2_prec_height = 15;
-
     tmp.csty = bytestream2_get_byteu(&s->g);
 
     // get progression order
@@ -309,7 +340,16 @@ static int get_cod(Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *c,
     tmp.nlayers    = bytestream2_get_be16u(&s->g);
     tmp.mct        = bytestream2_get_byteu(&s->g); // multiple component transformation
 
-    get_cox(s, &tmp);
+    if (tmp.mct && s->ncomponents < 3) {
+        av_log(s->avctx, AV_LOG_ERROR,
+               "MCT %d with too few components (%d)\n",
+               tmp.mct, s->ncomponents);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if ((ret = get_cox(s, &tmp)) < 0)
+        return ret;
+
     for (compno = 0; compno < s->ncomponents; compno++)
         if (!(properties[compno] & HAD_COC))
             memcpy(c + compno, &tmp, sizeof(tmp));
@@ -321,16 +361,25 @@ static int get_cod(Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *c,
 static int get_coc(Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *c,
                    uint8_t *properties)
 {
-    int compno;
+    int compno, ret;
 
     if (bytestream2_get_bytes_left(&s->g) < 2)
         return AVERROR_INVALIDDATA;
 
     compno = bytestream2_get_byteu(&s->g);
 
+    if (compno >= s->ncomponents) {
+        av_log(s->avctx, AV_LOG_ERROR,
+               "Invalid compno %d. There are %d components in the image.\n",
+               compno, s->ncomponents);
+        return AVERROR_INVALIDDATA;
+    }
+
     c      += compno;
     c->csty = bytestream2_get_byteu(&s->g);
-    get_cox(s, c);
+
+    if ((ret = get_cox(s, c)) < 0)
+        return ret;
 
     properties[compno] |= HAD_COC;
     return 0;
@@ -406,7 +455,15 @@ static int get_qcc(Jpeg2000DecoderContext *s, int n, Jpeg2000QuantStyle *q,
     if (bytestream2_get_bytes_left(&s->g) < 1)
         return AVERROR_INVALIDDATA;
 
-    compno              = bytestream2_get_byteu(&s->g);
+    compno = bytestream2_get_byteu(&s->g);
+
+    if (compno >= s->ncomponents) {
+        av_log(s->avctx, AV_LOG_ERROR,
+               "Invalid compno %d. There are %d components in the image.\n",
+               compno, s->ncomponents);
+        return AVERROR_INVALIDDATA;
+    }
+
     properties[compno] |= HAD_QCC;
     return get_qcx(s, n - 1, q + compno);
 }
@@ -423,6 +480,9 @@ static int get_sot(Jpeg2000DecoderContext *s, int n)
         return AVERROR_INVALIDDATA;
 
     Isot = bytestream2_get_be16u(&s->g);        // Isot
+    if (Isot >= s->numXtiles * s->numYtiles)
+        return AVERROR_INVALIDDATA;
+
     if (Isot) {
         avpriv_request_sample(s->avctx, "Support for more than one tile");
         return AVERROR_PATCHWELCOME;
@@ -433,6 +493,16 @@ static int get_sot(Jpeg2000DecoderContext *s, int n)
     /* Read TNSot but not used */
     bytestream2_get_byteu(&s->g);               // TNsot
 
+    if (Psot > bytestream2_get_bytes_left(&s->g) + n + 2) {
+        av_log(s->avctx, AV_LOG_ERROR, "Psot %d too big\n", Psot);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (TPsot >= FF_ARRAY_ELEMS(s->tile[Isot].tile_part)) {
+        avpriv_request_sample(s->avctx, "Support for %d components", TPsot);
+        return AVERROR_PATCHWELCOME;
+    }
+
     tp             = s->tile[s->curtileno].tile_part + TPsot;
     tp->tile_index = Isot;
     tp->tp_len     = Psot;
@@ -503,8 +573,6 @@ static int init_tile(Jpeg2000DecoderContext *s, int tileno)
     int tilex = tileno % s->numXtiles;
     int tiley = tileno / s->numXtiles;
     Jpeg2000Tile *tile = s->tile + tileno;
-    Jpeg2000CodingStyle *codsty;
-    Jpeg2000QuantStyle  *qntsty;
 
     if (!tile->comp)
         return AVERROR(ENOMEM);
@@ -512,26 +580,24 @@ static int init_tile(Jpeg2000DecoderContext *s, int tileno)
     /* copy codsty, qnsty to tile. TODO: Is it the best way?
      * codsty, qnsty is an array of 4 structs Jpeg2000CodingStyle
      * and Jpeg2000QuantStyle */
-    memcpy(tile->codsty, s->codsty, s->ncomponents * sizeof(*codsty));
-    memcpy(tile->qntsty, s->qntsty, s->ncomponents * sizeof(*qntsty));
+    memcpy(tile->codsty, s->codsty, s->ncomponents * sizeof(*tile->codsty));
+    memcpy(tile->qntsty, s->qntsty, s->ncomponents * sizeof(*tile->qntsty));
 
     for (compno = 0; compno < s->ncomponents; compno++) {
         Jpeg2000Component *comp = tile->comp + compno;
+        Jpeg2000CodingStyle *codsty = tile->codsty + compno;
+        Jpeg2000QuantStyle  *qntsty = tile->qntsty + compno;
         int ret; // global bandno
-        codsty = tile->codsty + compno;
-        qntsty = tile->qntsty + compno;
 
         comp->coord_o[0][0] = FFMAX(tilex       * s->tile_width  + s->tile_offset_x, s->image_offset_x);
         comp->coord_o[0][1] = FFMIN((tilex + 1) * s->tile_width  + s->tile_offset_x, s->width);
         comp->coord_o[1][0] = FFMAX(tiley       * s->tile_height + s->tile_offset_y, s->image_offset_y);
         comp->coord_o[1][1] = FFMIN((tiley + 1) * s->tile_height + s->tile_offset_y, s->height);
 
-        // FIXME: add a dcinema profile check ?
-        // value is guaranteed by profile (orig=0, 1 tile)
-        comp->coord[0][0] = 0;
-        comp->coord[0][1] = s->avctx->width;
-        comp->coord[1][0] = 0;
-        comp->coord[1][1] = s->avctx->height;
+        comp->coord[0][0] = ff_jpeg2000_ceildivpow2(comp->coord_o[0][0], s->reduction_factor);
+        comp->coord[0][1] = ff_jpeg2000_ceildivpow2(comp->coord_o[0][1], s->reduction_factor);
+        comp->coord[1][0] = ff_jpeg2000_ceildivpow2(comp->coord_o[1][0], s->reduction_factor);
+        comp->coord[1][1] = ff_jpeg2000_ceildivpow2(comp->coord_o[1][1], s->reduction_factor);
 
         if (ret = ff_jpeg2000_init_component(comp, codsty, qntsty,
                                              s->cbps[compno], s->cdx[compno],
@@ -588,8 +654,6 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s,
         if (band->coord[0][0] == band->coord[0][1] ||
             band->coord[1][0] == band->coord[1][1])
             continue;
-        prec->yi0 = 0;
-        prec->xi0 = 0;
         nb_code_blocks =  prec->nb_codeblocks_height *
                           prec->nb_codeblocks_width;
         for (cblkno = 0; cblkno < nb_code_blocks; cblkno++) {
@@ -605,10 +669,16 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s,
             else if (incl < 0)
                 return incl;
 
-            if (!cblk->npasses)
-                cblk->nonzerobits = expn[bandno] + numgbits - 1 -
-                                    tag_tree_decode(s, prec->zerobits + cblkno,
-                                                    100);
+            if (!cblk->npasses) {
+                int v = expn[bandno] + numgbits - 1 -
+                        tag_tree_decode(s, prec->zerobits + cblkno, 100);
+                if (v < 0) {
+                    av_log(s->avctx, AV_LOG_ERROR,
+                           "nonzerobits %d invalid\n", v);
+                    return AVERROR_INVALIDDATA;
+                }
+                cblk->nonzerobits = v;
+            }
             if ((newpasses = getnpasses(s)) < 0)
                 return newpasses;
             if ((llen = getlblockinc(s)) < 0)
@@ -616,6 +686,12 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s,
             cblk->lblock += llen;
             if ((ret = get_bits(s, av_log2(newpasses) + cblk->lblock)) < 0)
                 return ret;
+            if (ret > sizeof(cblk->data)) {
+                avpriv_request_sample(s->avctx,
+                                      "Block with lengthinc greater than %zu",
+                                      sizeof(cblk->data));
+                return AVERROR_PATCHWELCOME;
+            }
             cblk->lengthinc = ret;
             cblk->npasses  += newpasses;
         }
@@ -864,12 +940,13 @@ static int decode_cblk(Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *codsty,
     int passno = cblk->npasses, pass_t = 2, bpno = cblk->nonzerobits - 1, y;
 
     for (y = 0; y < height; y++)
-        memset(t1->data[y], 0, width * sizeof(width));
+        memset(t1->data[y], 0, width * sizeof(**t1->data));
+
     /* If code-block contains no compressed data: nothing to do. */
     if (!cblk->length)
         return 0;
     for (y = 0; y < height + 2; y++)
-        memset(t1->flags[y], 0, (width + 2) * sizeof(width));
+        memset(t1->flags[y], 0, (width + 2) * sizeof(**t1->flags));
 
     ff_mqc_initdec(&t1->mqc, cblk->data);
     cblk->data[cblk->length]     = 0xff;
@@ -910,13 +987,12 @@ static void dequantization_float(int x, int y, Jpeg2000Cblk *cblk,
                                  Jpeg2000T1Context *t1, Jpeg2000Band *band)
 {
     int i, j, idx;
-    float *datap = &comp->data[(comp->coord[0][1] - comp->coord[0][0]) * y + x];
+    float *datap = &comp->f_data[(comp->coord[0][1] - comp->coord[0][0]) * y + x];
     for (j = 0; j < (cblk->coord[1][1] - cblk->coord[1][0]); ++j)
         for (i = 0; i < (cblk->coord[0][1] - cblk->coord[0][0]); ++i) {
             idx        = (comp->coord[0][1] - comp->coord[0][0]) * j + i;
-            datap[idx] = (float)(t1->data[j][i]) * ((float)band->stepsize);
+            datap[idx] = (float)(t1->data[j][i]) * band->f_stepsize;
         }
-    return;
 }
 
 /* Integer dequantization of a codeblock.*/
@@ -925,15 +1001,13 @@ static void dequantization_int(int x, int y, Jpeg2000Cblk *cblk,
                                Jpeg2000T1Context *t1, Jpeg2000Band *band)
 {
     int i, j, idx;
-    int32_t *datap =
-        (int32_t *) &comp->data[(comp->coord[0][1] - comp->coord[0][0]) * y + x];
+    int32_t *datap = &comp->i_data[(comp->coord[0][1] - comp->coord[0][0]) * y + x];
     for (j = 0; j < (cblk->coord[1][1] - cblk->coord[1][0]); ++j)
         for (i = 0; i < (cblk->coord[0][1] - cblk->coord[0][0]); ++i) {
             idx        = (comp->coord[0][1] - comp->coord[0][0]) * j + i;
             datap[idx] =
-                ((int32_t)(t1->data[j][i]) * ((int32_t)band->stepsize) + (1 << 15)) >> 16;
+                ((int32_t)(t1->data[j][i]) * band->i_stepsize + (1 << 15)) >> 16;
         }
-    return;
 }
 
 /* Inverse ICT parameters in float and integer.
@@ -951,18 +1025,17 @@ static const int   i_ict_params[4] = {
     116130
 };
 
-static int mct_decode(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile)
+static void mct_decode(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile)
 {
     int i, csize = 1;
-    int ret = 0;
     int32_t *src[3],  i0,  i1,  i2;
     float   *srcf[3], i0f, i1f, i2f;
 
     for (i = 0; i < 3; i++)
         if (tile->codsty[0].transform == FF_DWT97)
-            srcf[i] = tile->comp[i].data;
+            srcf[i] = tile->comp[i].f_data;
         else
-            src[i] = (int32_t *)tile->comp[i].data;
+            src [i] = tile->comp[i].i_data;
 
     for (i = 0; i < 2; i++)
         csize *= tile->comp[0].coord[i][1] - tile->comp[0].coord[i][0];
@@ -1000,7 +1073,6 @@ static int mct_decode(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile)
         }
         break;
     }
-    return ret;
 }
 
 static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
@@ -1040,42 +1112,31 @@ static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
                                     cblk->coord[1][1] - cblk->coord[1][0],
                                     bandpos);
 
-                        /* Manage band offsets */
                         x = cblk->coord[0][0];
                         y = cblk->coord[1][0];
-                        if ((reslevelno > 0) && ((bandno + 1) & 1)) {
-                            Jpeg2000ResLevel *pres = comp->reslevel + (reslevelno - 1);
-                            x += pres->coord[0][1] - pres->coord[0][0];
-                        }
-                        if ((reslevelno > 0) && ((bandno + 1) & 2)) {
-                            Jpeg2000ResLevel *pres = comp->reslevel + (reslevelno - 1);
-                            y += pres->coord[1][1] - pres->coord[1][0];
-                        }
 
-                        if (s->avctx->flags & CODEC_FLAG_BITEXACT)
-                            dequantization_int(x, y, cblk, comp, &t1, band);
-                        else
+                        if (codsty->transform == FF_DWT97)
                             dequantization_float(x, y, cblk, comp, &t1, band);
+                        else
+                            dequantization_int(x, y, cblk, comp, &t1, band);
                    } /* end cblk */
                 } /*end prec */
             } /* end band */
         } /* end reslevel */
 
         /* inverse DWT */
-        ff_dwt_decode(&comp->dwt, comp->data);
+        ff_dwt_decode(&comp->dwt, codsty->transform == FF_DWT97 ? (void*)comp->f_data : (void*)comp->i_data);
     } /*end comp */
 
     /* inverse MCT transformation */
     if (tile->codsty[0].mct)
         mct_decode(s, tile);
 
-    if (s->avctx->pix_fmt == AV_PIX_FMT_BGRA) // RGBA -> BGRA
-        FFSWAP(float *, tile->comp[0].data, tile->comp[2].data);
-
     if (s->precision <= 8) {
         for (compno = 0; compno < s->ncomponents; compno++) {
             Jpeg2000Component *comp = tile->comp + compno;
-            int32_t *datap = (int32_t *)comp->data;
+            float *datap = comp->f_data;
+            int32_t *i_datap = comp->i_data;
             y    = tile->comp[compno].coord[1][0] - s->image_offset_y;
             line = picture->data[0] + y * picture->linesize[0];
             for (; y < tile->comp[compno].coord[1][1] - s->image_offset_y; y += s->cdy[compno]) {
@@ -1085,12 +1146,16 @@ static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
                 dst = line + x * s->ncomponents + compno;
 
                 for (; x < tile->comp[compno].coord[0][1] - s->image_offset_x; x += s->cdx[compno]) {
-                    *datap += 1 << (s->cbps[compno] - 1);
-                    if (*datap < 0)
-                        *datap = 0;
-                    else if (*datap >= (1 << s->cbps[compno]))
-                        *datap = (1 << s->cbps[compno]) - 1;
-                    *dst = *datap++;
+                     int val;
+                    /* DC level shift and clip see ISO 15444-1:2002 G.1.2 */
+                    if (tile->codsty->transform == FF_DWT97)
+                        val = lrintf(*datap) + (1 << (s->cbps[compno] - 1));
+                    else
+                        val = *i_datap + (1 << (s->cbps[compno] - 1));
+                    val = av_clip(val, 0, (1 << s->cbps[compno]) - 1);
+                    *dst = val << (8 - s->cbps[compno]);
+                    datap++;
+                    i_datap++;
                     dst += s->ncomponents;
                 }
                 line += picture->linesize[0];
@@ -1099,8 +1164,8 @@ static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
     } else {
         for (compno = 0; compno < s->ncomponents; compno++) {
             Jpeg2000Component *comp = tile->comp + compno;
-            float *datap = comp->data;
-            int32_t *i_datap = (int32_t *) comp->data;
+            float *datap = comp->f_data;
+            int32_t *i_datap = comp->i_data;
             uint16_t *linel;
 
             y     = tile->comp[compno].coord[1][0] - s->image_offset_y;
@@ -1110,15 +1175,15 @@ static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
                 x   = tile->comp[compno].coord[0][0] - s->image_offset_x;
                 dst = linel + (x * s->ncomponents + compno);
                 for (; x < s->avctx->width; x += s->cdx[compno]) {
-                    int16_t val;
+                    int val;
                     /* DC level shift and clip see ISO 15444-1:2002 G.1.2 */
-                    if (s->avctx->flags & CODEC_FLAG_BITEXACT)
-                        val = *i_datap + (1 << (s->cbps[compno] - 1));
-                    else
+                    if (tile->codsty->transform == FF_DWT97)
                         val = lrintf(*datap) + (1 << (s->cbps[compno] - 1));
+                    else
+                        val = *i_datap + (1 << (s->cbps[compno] - 1));
                     val = av_clip(val, 0, (1 << s->cbps[compno]) - 1);
                     /* align 12 bit values in little-endian mode */
-                    *dst = val << 4;
+                    *dst = val << (16 - s->cbps[compno]);
                     datap++;
                     i_datap++;
                     dst += s->ncomponents;
@@ -1164,12 +1229,29 @@ static int jpeg2000_read_main_headers(Jpeg2000DecoderContext *s)
         marker = bytestream2_get_be16u(&s->g);
         oldpos = bytestream2_tell(&s->g);
 
+        if (marker == JPEG2000_SOD) {
+            Jpeg2000Tile *tile;
+            Jpeg2000TilePart *tp;
+
+            if (s->curtileno < 0) {
+                av_log(s->avctx, AV_LOG_ERROR, "Missing SOT\n");
+                return AVERROR_INVALIDDATA;
+            }
+
+            tile = s->tile + s->curtileno;
+            tp = tile->tile_part + tile->tp_idx;
+            bytestream2_init(&tp->tpg, s->g.buffer, tp->tp_end - s->g.buffer);
+            bytestream2_skip(&s->g, tp->tp_end - s->g.buffer);
+
+            continue;
+        }
         if (marker == JPEG2000_EOC)
             break;
 
-        if (bytestream2_get_bytes_left(&s->g) < 2)
-            return AVERROR_INVALIDDATA;
         len = bytestream2_get_be16u(&s->g);
+        if (len < 2 || bytestream2_get_bytes_left(&s->g) < len - 2)
+            return AVERROR_INVALIDDATA;
+
         switch (marker) {
         case JPEG2000_SIZ:
             ret = get_siz(s);
@@ -1264,11 +1346,10 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, void *data,
     bytestream2_init(&s->g, avpkt->data, avpkt->size);
     s->curtileno = 0; // TODO: only one tile in DCI JP2K. to implement for more tiles
 
-    // reduction factor, i.e number of resolution levels to skip
-    s->reduction_factor = s->lowres;
-
-    if (bytestream2_get_bytes_left(&s->g) < 2)
-        return AVERROR_INVALIDDATA;
+    if (bytestream2_get_bytes_left(&s->g) < 2) {
+        ret = AVERROR_INVALIDDATA;
+        goto end;
+    }
 
     // check if the image is in jp2 format
     if (bytestream2_get_bytes_left(&s->g) >= 12 &&
@@ -1278,17 +1359,17 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, void *data,
         if (!jp2_find_codestream(s)) {
             av_log(avctx, AV_LOG_ERROR,
                    "Could not find Jpeg2000 codestream atom.\n");
-            return AVERROR_INVALIDDATA;
+            ret = AVERROR_INVALIDDATA;
+            goto end;
         }
     } else {
         bytestream2_seek(&s->g, 0, SEEK_SET);
-        if (bytestream2_peek_be16(&s->g) != JPEG2000_SOC)
-            bytestream2_skip(&s->g, 8);
     }
 
     if (bytestream2_get_be16u(&s->g) != JPEG2000_SOC) {
         av_log(avctx, AV_LOG_ERROR, "SOC marker not present\n");
-        return AVERROR_INVALIDDATA;
+        ret = AVERROR_INVALIDDATA;
+        goto end;
     }
     if (ret = jpeg2000_read_main_headers(s))
         goto end;
@@ -1307,6 +1388,8 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, void *data,
         if (ret = jpeg2000_decode_tile(s, s->tile + tileno, picture))
             goto end;
 
+    jpeg2000_dec_cleanup(s);
+
     *got_frame = 1;
 
     return bytestream2_tell(&s->g);
@@ -1326,7 +1409,7 @@ static void jpeg2000_init_static_data(AVCodec *codec)
 
 static const AVOption options[] = {
     { "lowres",  "Lower the decoding resolution by a power of two",
-        OFFSET(lowres), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, JPEG2000_MAX_RESLEVELS - 1, VD },
+        OFFSET(reduction_factor), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, JPEG2000_MAX_RESLEVELS - 1, VD },
     { NULL },
 };
 
@@ -1356,8 +1439,5 @@ AVCodec ff_jpeg2000_decoder = {
     .init_static_data = jpeg2000_init_static_data,
     .decode           = jpeg2000_decode_frame,
     .priv_class       = &class,
-    .pix_fmts         = (enum AVPixelFormat[]) { AV_PIX_FMT_XYZ12,
-                                                 AV_PIX_FMT_GRAY8,
-                                                 -1 },
     .profiles         = NULL_IF_CONFIG_SMALL(profiles)
 };