1/2 resolution decoding
[libav.git] / libavcodec / jrevdct.c
index 3bd78c1..1c253fe 100644 (file)
@@ -1172,5 +1172,461 @@ void j_rev_dct(DCTBLOCK data)
   }
 }
 
+#undef DCTSIZE
+#define DCTSIZE 4
+#define DCTSTRIDE 8
+
+void j_rev_dct4(DCTBLOCK data)
+{
+  int32_t tmp0, tmp1, tmp2, tmp3;
+  int32_t tmp10, tmp11, tmp12, tmp13;
+  int32_t z1;
+  int32_t d0, d2, d4, d6;
+  register DCTELEM *dataptr;
+  int rowctr;
+   
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+  dataptr = data;
+
+  for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any row in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * row DCT calculations can be simplified this way.
+     */
+
+    register int *idataptr = (int*)dataptr;
+
+    /* WARNING: we do the same permutation as MMX idct to simplify the
+       video core */
+    d0 = dataptr[0];
+    d2 = dataptr[1];
+    d4 = dataptr[2];
+    d6 = dataptr[3];
+
+    if ((d2 | d4 | d6) == 0) {
+      /* AC terms all zero */
+      if (d0) {
+         /* Compute a 32 bit value to assign. */
+         DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS);
+         register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
+         
+         idataptr[0] = v;
+         idataptr[1] = v;
+      }
+      
+      dataptr += DCTSTRIDE;    /* advance pointer to next row */
+      continue;
+    }
+
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+    if (d6) {
+       if (d4) {
+           if (d2) {
+               if (d0) {
+                   /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
+                   z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
+                   tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
+                   tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
+
+                   tmp0 = (d0 + d4) << CONST_BITS;
+                   tmp1 = (d0 - d4) << CONST_BITS;
+
+                   tmp10 = tmp0 + tmp3;
+                   tmp13 = tmp0 - tmp3;
+                   tmp11 = tmp1 + tmp2;
+                   tmp12 = tmp1 - tmp2;
+               } else {
+                   /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
+                   z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
+                   tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
+                   tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
+
+                   tmp0 = d4 << CONST_BITS;
+
+                   tmp10 = tmp0 + tmp3;
+                   tmp13 = tmp0 - tmp3;
+                   tmp11 = tmp2 - tmp0;
+                   tmp12 = -(tmp0 + tmp2);
+               }
+           } else {
+               if (d0) {
+                   /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
+                   tmp2 = MULTIPLY(-d6, FIX_1_306562965);
+                   tmp3 = MULTIPLY(d6, FIX_0_541196100);
+
+                   tmp0 = (d0 + d4) << CONST_BITS;
+                   tmp1 = (d0 - d4) << CONST_BITS;
+
+                   tmp10 = tmp0 + tmp3;
+                   tmp13 = tmp0 - tmp3;
+                   tmp11 = tmp1 + tmp2;
+                   tmp12 = tmp1 - tmp2;
+               } else {
+                   /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
+                   tmp2 = MULTIPLY(-d6, FIX_1_306562965);
+                   tmp3 = MULTIPLY(d6, FIX_0_541196100);
+
+                   tmp0 = d4 << CONST_BITS;
+
+                   tmp10 = tmp0 + tmp3;
+                   tmp13 = tmp0 - tmp3;
+                   tmp11 = tmp2 - tmp0;
+                   tmp12 = -(tmp0 + tmp2);
+               }
+           }
+       } else {
+           if (d2) {
+               if (d0) {
+                   /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
+                   z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
+                   tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
+                   tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
+
+                   tmp0 = d0 << CONST_BITS;
+
+                   tmp10 = tmp0 + tmp3;
+                   tmp13 = tmp0 - tmp3;
+                   tmp11 = tmp0 + tmp2;
+                   tmp12 = tmp0 - tmp2;
+               } else {
+                   /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
+                   z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
+                   tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
+                   tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
+
+                   tmp10 = tmp3;
+                   tmp13 = -tmp3;
+                   tmp11 = tmp2;
+                   tmp12 = -tmp2;
+               }
+           } else {
+               if (d0) {
+                   /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
+                   tmp2 = MULTIPLY(-d6, FIX_1_306562965);
+                   tmp3 = MULTIPLY(d6, FIX_0_541196100);
+
+                   tmp0 = d0 << CONST_BITS;
+
+                   tmp10 = tmp0 + tmp3;
+                   tmp13 = tmp0 - tmp3;
+                   tmp11 = tmp0 + tmp2;
+                   tmp12 = tmp0 - tmp2;
+               } else {
+                   /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
+                   tmp2 = MULTIPLY(-d6, FIX_1_306562965);
+                   tmp3 = MULTIPLY(d6, FIX_0_541196100);
+
+                   tmp10 = tmp3;
+                   tmp13 = -tmp3;
+                   tmp11 = tmp2;
+                   tmp12 = -tmp2;
+               }
+           }
+       }
+    } else {
+       if (d4) {
+           if (d2) {
+               if (d0) {
+                   /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
+                   tmp2 = MULTIPLY(d2, FIX_0_541196100);
+                   tmp3 = MULTIPLY(d2, FIX_1_306562965);
+
+                   tmp0 = (d0 + d4) << CONST_BITS;
+                   tmp1 = (d0 - d4) << CONST_BITS;
+
+                   tmp10 = tmp0 + tmp3;
+                   tmp13 = tmp0 - tmp3;
+                   tmp11 = tmp1 + tmp2;
+                   tmp12 = tmp1 - tmp2;
+               } else {
+                   /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
+                   tmp2 = MULTIPLY(d2, FIX_0_541196100);
+                   tmp3 = MULTIPLY(d2, FIX_1_306562965);
+
+                   tmp0 = d4 << CONST_BITS;
+
+                   tmp10 = tmp0 + tmp3;
+                   tmp13 = tmp0 - tmp3;
+                   tmp11 = tmp2 - tmp0;
+                   tmp12 = -(tmp0 + tmp2);
+               }
+           } else {
+               if (d0) {
+                   /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
+                   tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
+                   tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
+               } else {
+                   /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
+                   tmp10 = tmp13 = d4 << CONST_BITS;
+                   tmp11 = tmp12 = -tmp10;
+               }
+           }
+       } else {
+           if (d2) {
+               if (d0) {
+                   /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
+                   tmp2 = MULTIPLY(d2, FIX_0_541196100);
+                   tmp3 = MULTIPLY(d2, FIX_1_306562965);
+
+                   tmp0 = d0 << CONST_BITS;
+
+                   tmp10 = tmp0 + tmp3;
+                   tmp13 = tmp0 - tmp3;
+                   tmp11 = tmp0 + tmp2;
+                   tmp12 = tmp0 - tmp2;
+               } else {
+                   /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
+                   tmp2 = MULTIPLY(d2, FIX_0_541196100);
+                   tmp3 = MULTIPLY(d2, FIX_1_306562965);
+
+                   tmp10 = tmp3;
+                   tmp13 = -tmp3;
+                   tmp11 = tmp2;
+                   tmp12 = -tmp2;
+               }
+           } else {
+               if (d0) {
+                   /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
+                   tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
+               } else {
+                   /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
+                   tmp10 = tmp13 = tmp11 = tmp12 = 0;
+               }
+           }
+       }
+      }
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    dataptr[0] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
+    dataptr[1] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
+    dataptr[2] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
+
+    dataptr += DCTSTRIDE;              /* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns. */
+  /* Note that we must descale the results by a factor of 8 == 2**3, */
+  /* and also undo the PASS1_BITS scaling. */
+
+  dataptr = data;
+  for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
+    /* Columns of zeroes can be exploited in the same way as we did with rows.
+     * However, the row calculation has created many nonzero AC terms, so the
+     * simplification applies less often (typically 5% to 10% of the time).
+     * On machines with very fast multiplication, it's possible that the
+     * test takes more time than it's worth.  In that case this section
+     * may be commented out.
+     */
+
+    d0 = dataptr[DCTSTRIDE*0];
+    d2 = dataptr[DCTSTRIDE*1];
+    d4 = dataptr[DCTSTRIDE*2];
+    d6 = dataptr[DCTSTRIDE*3];
+
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+    if (d6) {
+       if (d4) {
+           if (d2) {
+               if (d0) {
+                   /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
+                   z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
+                   tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
+                   tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
+
+                   tmp0 = (d0 + d4) << CONST_BITS;
+                   tmp1 = (d0 - d4) << CONST_BITS;
+
+                   tmp10 = tmp0 + tmp3;
+                   tmp13 = tmp0 - tmp3;
+                   tmp11 = tmp1 + tmp2;
+                   tmp12 = tmp1 - tmp2;
+               } else {
+                   /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
+                   z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
+                   tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
+                   tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
+
+                   tmp0 = d4 << CONST_BITS;
+
+                   tmp10 = tmp0 + tmp3;
+                   tmp13 = tmp0 - tmp3;
+                   tmp11 = tmp2 - tmp0;
+                   tmp12 = -(tmp0 + tmp2);
+               }
+           } else {
+               if (d0) {
+                   /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
+                   tmp2 = MULTIPLY(-d6, FIX_1_306562965);
+                   tmp3 = MULTIPLY(d6, FIX_0_541196100);
+
+                   tmp0 = (d0 + d4) << CONST_BITS;
+                   tmp1 = (d0 - d4) << CONST_BITS;
+
+                   tmp10 = tmp0 + tmp3;
+                   tmp13 = tmp0 - tmp3;
+                   tmp11 = tmp1 + tmp2;
+                   tmp12 = tmp1 - tmp2;
+               } else {
+                   /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
+                   tmp2 = MULTIPLY(-d6, FIX_1_306562965);
+                   tmp3 = MULTIPLY(d6, FIX_0_541196100);
+
+                   tmp0 = d4 << CONST_BITS;
+
+                   tmp10 = tmp0 + tmp3;
+                   tmp13 = tmp0 - tmp3;
+                   tmp11 = tmp2 - tmp0;
+                   tmp12 = -(tmp0 + tmp2);
+               }
+           }
+       } else {
+           if (d2) {
+               if (d0) {
+                   /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
+                   z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
+                   tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
+                   tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
+
+                   tmp0 = d0 << CONST_BITS;
+
+                   tmp10 = tmp0 + tmp3;
+                   tmp13 = tmp0 - tmp3;
+                   tmp11 = tmp0 + tmp2;
+                   tmp12 = tmp0 - tmp2;
+               } else {
+                   /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
+                   z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
+                   tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
+                   tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
+
+                   tmp10 = tmp3;
+                   tmp13 = -tmp3;
+                   tmp11 = tmp2;
+                   tmp12 = -tmp2;
+               }
+           } else {
+               if (d0) {
+                   /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
+                   tmp2 = MULTIPLY(-d6, FIX_1_306562965);
+                   tmp3 = MULTIPLY(d6, FIX_0_541196100);
+
+                   tmp0 = d0 << CONST_BITS;
+
+                   tmp10 = tmp0 + tmp3;
+                   tmp13 = tmp0 - tmp3;
+                   tmp11 = tmp0 + tmp2;
+                   tmp12 = tmp0 - tmp2;
+               } else {
+                   /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
+                   tmp2 = MULTIPLY(-d6, FIX_1_306562965);
+                   tmp3 = MULTIPLY(d6, FIX_0_541196100);
+
+                   tmp10 = tmp3;
+                   tmp13 = -tmp3;
+                   tmp11 = tmp2;
+                   tmp12 = -tmp2;
+               }
+           }
+       }
+    } else {
+       if (d4) {
+           if (d2) {
+               if (d0) {
+                   /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
+                   tmp2 = MULTIPLY(d2, FIX_0_541196100);
+                   tmp3 = MULTIPLY(d2, FIX_1_306562965);
+
+                   tmp0 = (d0 + d4) << CONST_BITS;
+                   tmp1 = (d0 - d4) << CONST_BITS;
+
+                   tmp10 = tmp0 + tmp3;
+                   tmp13 = tmp0 - tmp3;
+                   tmp11 = tmp1 + tmp2;
+                   tmp12 = tmp1 - tmp2;
+               } else {
+                   /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
+                   tmp2 = MULTIPLY(d2, FIX_0_541196100);
+                   tmp3 = MULTIPLY(d2, FIX_1_306562965);
+
+                   tmp0 = d4 << CONST_BITS;
+
+                   tmp10 = tmp0 + tmp3;
+                   tmp13 = tmp0 - tmp3;
+                   tmp11 = tmp2 - tmp0;
+                   tmp12 = -(tmp0 + tmp2);
+               }
+           } else {
+               if (d0) {
+                   /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
+                   tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
+                   tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
+               } else {
+                   /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
+                   tmp10 = tmp13 = d4 << CONST_BITS;
+                   tmp11 = tmp12 = -tmp10;
+               }
+           }
+       } else {
+           if (d2) {
+               if (d0) {
+                   /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
+                   tmp2 = MULTIPLY(d2, FIX_0_541196100);
+                   tmp3 = MULTIPLY(d2, FIX_1_306562965);
+
+                   tmp0 = d0 << CONST_BITS;
+
+                   tmp10 = tmp0 + tmp3;
+                   tmp13 = tmp0 - tmp3;
+                   tmp11 = tmp0 + tmp2;
+                   tmp12 = tmp0 - tmp2;
+               } else {
+                   /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
+                   tmp2 = MULTIPLY(d2, FIX_0_541196100);
+                   tmp3 = MULTIPLY(d2, FIX_1_306562965);
+
+                   tmp10 = tmp3;
+                   tmp13 = -tmp3;
+                   tmp11 = tmp2;
+                   tmp12 = -tmp2;
+               }
+           } else {
+               if (d0) {
+                   /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
+                   tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
+               } else {
+                   /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
+                   tmp10 = tmp13 = tmp11 = tmp12 = 0;
+               }
+           }
+       }
+    }
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    dataptr[DCTSTRIDE*0] = (DCTELEM) DESCALE(tmp10,
+                                          CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSTRIDE*1] = (DCTELEM) DESCALE(tmp11,
+                                          CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSTRIDE*2] = (DCTELEM) DESCALE(tmp12,
+                                          CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSTRIDE*3] = (DCTELEM) DESCALE(tmp13,
+                                          CONST_BITS+PASS1_BITS+3);
+    
+    dataptr++;                 /* advance pointer to next column */
+  }
+}
+
+
 #undef FIX
 #undef CONST_BITS