Move some branches outside looped code. Should improve the generated asm (and
authorSebastian Vater <cdgs.basty@googlemail.com>
Mon, 26 Apr 2010 22:38:41 +0000 (22:38 +0000)
committerRonald S. Bultje <rsbultje@gmail.com>
Mon, 26 Apr 2010 22:38:41 +0000 (22:38 +0000)
thus performance) slightly.

Patch by Sebastian Vater <cdgs.basty googlemail com>.

Originally committed as revision 22975 to svn://svn.ffmpeg.org/ffmpeg/trunk

libavcodec/iff.c

index b5ee2e6..e628c5e 100644 (file)
@@ -140,16 +140,23 @@ static int decode_frame_ilbm(AVCodecContext *avctx,
         return -1;
     }
 
         return -1;
     }
 
+    if (avctx->pix_fmt == PIX_FMT_PAL8) {
     for(y = 0; y < avctx->height; y++ ) {
         uint8_t *row = &s->frame.data[0][ y*s->frame.linesize[0] ];
     for(y = 0; y < avctx->height; y++ ) {
         uint8_t *row = &s->frame.data[0][ y*s->frame.linesize[0] ];
-        memset(row, 0, avctx->pix_fmt == PIX_FMT_PAL8 ? avctx->width : (avctx->width * 4));
+        memset(row, 0, avctx->width);
         for (plane = 0; plane < avctx->bits_per_coded_sample && buf < buf_end; plane++) {
         for (plane = 0; plane < avctx->bits_per_coded_sample && buf < buf_end; plane++) {
-            if (avctx->pix_fmt == PIX_FMT_PAL8) {
                 decodeplane8(row, buf, FFMIN(s->planesize, buf_end - buf), avctx->bits_per_coded_sample, plane);
                 decodeplane8(row, buf, FFMIN(s->planesize, buf_end - buf), avctx->bits_per_coded_sample, plane);
+                buf += s->planesize;
+            }
+        }
             } else { // PIX_FMT_BGR32
             } else { // PIX_FMT_BGR32
+        for(y = 0; y < avctx->height; y++ ) {
+            uint8_t *row = &s->frame.data[0][y*s->frame.linesize[0]];
+            memset(row, 0, avctx->width << 2);
+            for (plane = 0; plane < avctx->bits_per_coded_sample && buf < buf_end; plane++) {
                 decodeplane32((uint32_t *) row, buf, FFMIN(s->planesize, buf_end - buf), avctx->bits_per_coded_sample, plane);
                 decodeplane32((uint32_t *) row, buf, FFMIN(s->planesize, buf_end - buf), avctx->bits_per_coded_sample, plane);
-            }
             buf += s->planesize;
             buf += s->planesize;
+            }
         }
     }
 
         }
     }
 
@@ -173,10 +180,11 @@ static int decode_frame_byterun1(AVCodecContext *avctx,
         return -1;
     }
 
         return -1;
     }
 
+    if (avctx->codec_tag == MKTAG('I','L','B','M')) { //interleaved
+        if (avctx->pix_fmt == PIX_FMT_PAL8) {
     for(y = 0; y < avctx->height ; y++ ) {
         uint8_t *row = &s->frame.data[0][ y*s->frame.linesize[0] ];
     for(y = 0; y < avctx->height ; y++ ) {
         uint8_t *row = &s->frame.data[0][ y*s->frame.linesize[0] ];
-        if (avctx->codec_tag == MKTAG('I','L','B','M')) { //interleaved
-            memset(row, 0, avctx->pix_fmt == PIX_FMT_PAL8 ? avctx->width : (avctx->width * 4));
+            memset(row, 0, avctx->width);
             for (plane = 0; plane < avctx->bits_per_coded_sample; plane++) {
                 for(x = 0; x < s->planesize && buf < buf_end; ) {
                     int8_t value = *buf++;
             for (plane = 0; plane < avctx->bits_per_coded_sample; plane++) {
                 for(x = 0; x < s->planesize && buf < buf_end; ) {
                     int8_t value = *buf++;
@@ -193,13 +201,36 @@ static int decode_frame_byterun1(AVCodecContext *avctx,
                     }
                     x += length;
                 }
                     }
                     x += length;
                 }
-                if (avctx->pix_fmt == PIX_FMT_PAL8) {
                     decodeplane8(row, s->planebuf, s->planesize, avctx->bits_per_coded_sample, plane);
                     decodeplane8(row, s->planebuf, s->planesize, avctx->bits_per_coded_sample, plane);
+                }
+            }
                 } else { //PIX_FMT_BGR32
                 } else { //PIX_FMT_BGR32
+            for(y = 0; y < avctx->height ; y++ ) {
+                uint8_t *row = &s->frame.data[0][y*s->frame.linesize[0]];
+                memset(row, 0, avctx->width << 2);
+                for (plane = 0; plane < avctx->bits_per_coded_sample; plane++) {
+                    for(x = 0; x < s->planesize && buf < buf_end; ) {
+                        int8_t value = *buf++;
+                        unsigned length;
+                        if (value >= 0) {
+                            length = value + 1;
+                            memcpy(s->planebuf + x, buf, FFMIN3(length, s->planesize - x, buf_end - buf));
+                            buf += length;
+                        } else if (value > -128) {
+                            length = -value + 1;
+                            memset(s->planebuf + x, *buf++, FFMIN(length, s->planesize - x));
+                        } else { // noop
+                            continue;
+                        }
+                        x += length;
+                    }
                     decodeplane32((uint32_t *) row, s->planebuf, s->planesize, avctx->bits_per_coded_sample, plane);
                 }
             }
                     decodeplane32((uint32_t *) row, s->planebuf, s->planesize, avctx->bits_per_coded_sample, plane);
                 }
             }
+        }
         } else {
         } else {
+            for(y = 0; y < avctx->height ; y++ ) {
+                uint8_t *row = &s->frame.data[0][y*s->frame.linesize[0]];
             for(x = 0; x < avctx->width && buf < buf_end; ) {
                 int8_t value = *buf++;
                 unsigned length;
             for(x = 0; x < avctx->width && buf < buf_end; ) {
                 int8_t value = *buf++;
                 unsigned length;