13% faster decode_residual (cavlc).
authordiane_cartman@gmx.de <diane_cartman@gmx.de>
Tue, 27 Sep 2005 04:46:45 +0000 (04:46 +0000)
committerLoren Merritt <lorenm@u.washington.edu>
Tue, 27 Sep 2005 04:46:45 +0000 (04:46 +0000)
patch by diane_cartman at gmx dot de.

Originally committed as revision 4617 to svn://svn.ffmpeg.org/ffmpeg/trunk

libavcodec/h264.c

index 14362d3..68bc3e8 100644 (file)
@@ -4437,8 +4437,8 @@ static inline int get_dct8x8_allowed(H264Context *h){
 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint16_t *qmul, int max_coeff){
     MpegEncContext * const s = &h->s;
     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
-    int level[16], run[16];
-    int suffix_length, zeros_left, coeff_num, coeff_token, total_coeff, i, trailing_ones;
+    int level[16];
+    int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
 
     //FIXME put trailing_onex into the context
 
@@ -4471,12 +4471,12 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
         level[i]= 1 - 2*get_bits1(gb);
     }
 
-    suffix_length= total_coeff > 10 && trailing_ones < 3;
-
-    for(; i<total_coeff; i++){
-        const int prefix= get_level_prefix(gb);
+    if(i<total_coeff) {
         int level_code, mask;
+        int suffix_length = total_coeff > 10 && trailing_ones < 3;
+        int prefix= get_level_prefix(gb);
 
+        //first coefficient has suffix_length equal to 0 or 1
         if(prefix<14){ //FIXME try to build a large unified VLC table for all this
             if(suffix_length)
                 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
@@ -4495,20 +4495,32 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
             return -1;
         }
 
-        if(i==trailing_ones && i<3) level_code+= 2; //FIXME split first iteration
+        if(trailing_ones < 3) level_code += 2;
 
+        suffix_length = 1;
+        if(level_code > 5)
+            suffix_length++;
         mask= -(level_code&1);
         level[i]= (((2+level_code)>>1) ^ mask) - mask;
-
-        if(suffix_length==0) suffix_length=1; //FIXME split first iteration
-
-#if 1
-        if(ABS(level[i]) > (3<<(suffix_length-1)) && suffix_length<6) suffix_length++;
-#else        
-        if((2+level_code)>>1) > (3<<(suffix_length-1)) && suffix_length<6) suffix_length++;
-        /* ? == prefix > 2 or sth */
-#endif
-        tprintf("level: %d suffix_length:%d\n", level[i], suffix_length);
+        i++;
+
+        //remaining coefficients have suffix_length > 0
+        for(;i<total_coeff;i++) {
+            static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
+            prefix = get_level_prefix(gb);
+            if(prefix<15){
+                level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
+            }else if(prefix==15){
+                level_code =  (prefix<<suffix_length) + get_bits(gb, 12);
+            }else{
+                av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
+                return -1;
+            }
+            mask= -(level_code&1);
+            level[i]= (((2+level_code)>>1) ^ mask) - mask;
+            if(level_code > suffix_limit[suffix_length])
+                suffix_length++;
+        }
     }
 
     if(total_coeff == max_coeff)
@@ -4519,50 +4531,49 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
         else
             zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
     }
-    
-    for(i=0; i<total_coeff-1; i++){
-        if(zeros_left <=0)
-            break;
-        else if(zeros_left < 7){
-            run[i]= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
-        }else{
-            run[i]= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
-        }
-        zeros_left -= run[i];
-    }
-
-    if(zeros_left<0){
-        av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
-        return -1;
-    }
-    
-    for(; i<total_coeff-1; i++){
-        run[i]= 0;
-    }
-
-    run[i]= zeros_left;
 
-    coeff_num=-1;
+    coeff_num = zeros_left + total_coeff - 1;
+    j = scantable[coeff_num];
     if(n > 24){
-        for(i=total_coeff-1; i>=0; i--){ //FIXME merge into rundecode?
-            int j;
-
-            coeff_num += run[i] + 1; //FIXME add 1 earlier ?
+        block[j] = level[0];
+        for(i=1;i<total_coeff;i++) {
+            if(zeros_left <= 0)
+                run_before = 0;
+            else if(zeros_left < 7){
+                run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
+            }else{
+                run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
+            }
+            zeros_left -= run_before;
+            coeff_num -= 1 + run_before;
             j= scantable[ coeff_num ];
 
             block[j]= level[i];
         }
     }else{
-        for(i=total_coeff-1; i>=0; i--){ //FIXME merge into  rundecode?
-            int j;
-
-            coeff_num += run[i] + 1; //FIXME add 1 earlier ?
+        block[j] = level[0] * qmul[j];
+        for(i=1;i<total_coeff;i++) {
+            if(zeros_left <= 0)
+                run_before = 0;
+            else if(zeros_left < 7){
+                run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
+            }else{
+                run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
+            }
+            zeros_left -= run_before;
+            coeff_num -= 1 + run_before;
             j= scantable[ coeff_num ];
 
             block[j]= level[i] * qmul[j];
 //            printf("%d %d  ", block[j], qmul[j]);
         }
     }
+
+    if(zeros_left<0){
+        av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
+        return -1;
+    }
+
     return 0;
 }