Changed to use integer 8x8 dct

The commit added an integer version of 8x8 forward DCT, based on the orginal forward DCT from VP6. The constants, roundings, and shifts were adjusted to improve the accuracy. The latest patch has a very similar accuracy in term of round trip error against the floating point version. It should be noted here that the purpose of the patch is to help encoding speed and facilitate all other experiments. There will be futher review in combination with inverse DCT before finalization. configure with "--enable--int_8x8fdct" to use the integer version Change-Id: I5a4f80507429f0e07cf02a13768ec81cbfddc5bc
author: Yaowu Xu <yaowu@google.com> 2012-05-09 09:31:14 -0700
committer: Yaowu Xu <yaowu@google.com> 2012-05-15 07:28:26 -0700
commit: b22cc559b6f3d17a5b230f239066158328bbfb9a (patch)
tree: 983efa09f345f11959a437d43965df8b965ae31f
parent: 3536ad5bb9a630e58bd6503423023b16b40b201e (diff)
download: libvpx-b22cc559b6f3d17a5b230f239066158328bbfb9a.tar
libvpx-b22cc559b6f3d17a5b230f239066158328bbfb9a.tar.gz
libvpx-b22cc559b6f3d17a5b230f239066158328bbfb9a.tar.bz2
libvpx-b22cc559b6f3d17a5b230f239066158328bbfb9a.zip
3 files changed, 254 insertions, 8 deletions
diff --git a/configure b/configure
index 67171744b..52ff9c898 100755
--- a/configure
+++ b/configure
@@ -228,6 +228,7 @@ EXPERIMENT_LIST="
     newupdate
     superblocks
     expanded_coef_context
+    int_8x8fdct
 "
 CONFIG_LIST="
     external_build
diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c
index e7505619a..6f9c68ef7 100644
--- a/vp8/encoder/dct.c
+++ b/vp8/encoder/dct.c
@@ -12,6 +12,249 @@
 #include <math.h>
 #include "vpx_ports/config.h"
 
+#if CONFIG_INT_8X8FDCT
+
+static const int xC1S7 = 16069;
+static const int xC2S6 = 15137;
+static const int xC3S5 = 13623;
+static const int xC4S4 = 11585;
+static const int xC5S3 =  9102;
+static const int xC6S2 =  6270;
+static const int xC7S1 =  3196;
+
+#define SHIFT_BITS 14
+#define DOROUND(X) X += (1<<(SHIFT_BITS-1));
+
+#define FINAL_SHIFT 3
+#define FINAL_ROUNDING (1<<(FINAL_SHIFT -1))
+#define IN_SHIFT (FINAL_SHIFT+1)
+
+
+void vp8_short_fdct8x8_c ( short * InputData, short * OutputData, int pitch)
+{
+    int loop;
+    int short_pitch = pitch>>1;
+    int is07, is12, is34, is56;
+    int is0734, is1256;
+    int id07, id12, id34, id56;
+    int irot_input_x, irot_input_y;
+    int icommon_product1;      // Re-used product  (c4s4 * (s12 - s56))
+    int icommon_product2;      // Re-used product  (c4s4 * (d12 + d56))
+    int temp1, temp2;          // intermediate variable for computation
+
+    int  InterData[64];
+    int  *ip = InterData;
+    short *op = OutputData;
+
+    for (loop = 0; loop < 8; loop++)
+    {
+        // Pre calculate some common sums and differences.
+        is07 = (InputData[0] + InputData[7])<<IN_SHIFT;
+        is12 = (InputData[1] + InputData[2])<<IN_SHIFT;
+        is34 = (InputData[3] + InputData[4])<<IN_SHIFT;
+        is56 = (InputData[5] + InputData[6])<<IN_SHIFT;
+        id07 = (InputData[0] - InputData[7])<<IN_SHIFT;
+        id12 = (InputData[1] - InputData[2])<<IN_SHIFT;
+        id34 = (InputData[3] - InputData[4])<<IN_SHIFT;
+        id56 = (InputData[5] - InputData[6])<<IN_SHIFT;
+
+        is0734 = is07 + is34;
+        is1256 = is12 + is56;
+
+        // Pre-Calculate some common product terms.
+        icommon_product1 = xC4S4*(is12 - is56);
+        DOROUND(icommon_product1)
+        icommon_product1>>=SHIFT_BITS;
+
+        icommon_product2 = xC4S4*(id12 + id56);
+        DOROUND(icommon_product2)
+        icommon_product2>>=SHIFT_BITS;
+
+
+        ip[0] = (xC4S4*(is0734 + is1256));
+        DOROUND(ip[0]);
+        ip[0] >>= SHIFT_BITS;
+
+        ip[4] = (xC4S4*(is0734 - is1256));
+        DOROUND(ip[4]);
+        ip[4] >>= SHIFT_BITS;
+
+        // Define inputs to rotation for outputs 2 and 6
+        irot_input_x = id12 - id56;
+        irot_input_y = is07 - is34;
+
+        // Apply rotation for outputs 2 and 6.
+        temp1=xC6S2*irot_input_x;
+        DOROUND(temp1);
+        temp1>>=SHIFT_BITS;
+        temp2=xC2S6*irot_input_y;
+        DOROUND(temp2);
+        temp2>>=SHIFT_BITS;
+        ip[2] = temp1 + temp2;
+
+        temp1=xC6S2*irot_input_y;
+        DOROUND(temp1);
+        temp1>>=SHIFT_BITS;
+        temp2=xC2S6*irot_input_x ;
+        DOROUND(temp2);
+        temp2>>=SHIFT_BITS;
+        ip[6] = temp1 -temp2 ;
+
+        // Define inputs to rotation for outputs 1 and 7
+        irot_input_x = icommon_product1 + id07;
+        irot_input_y = -( id34 + icommon_product2 );
+
+        // Apply rotation for outputs 1 and 7.
+        temp1=xC1S7*irot_input_x;
+        DOROUND(temp1);
+        temp1>>=SHIFT_BITS;
+        temp2=xC7S1*irot_input_y;
+        DOROUND(temp2);
+        temp2>>=SHIFT_BITS;
+        ip[1] = temp1 - temp2;
+
+        temp1=xC7S1*irot_input_x;
+        DOROUND(temp1);
+        temp1>>=SHIFT_BITS;
+        temp2=xC1S7*irot_input_y ;
+        DOROUND(temp2);
+        temp2>>=SHIFT_BITS;
+        ip[7] = temp1 + temp2 ;
+
+        // Define inputs to rotation for outputs 3 and 5
+        irot_input_x = id07 - icommon_product1;
+        irot_input_y = id34 - icommon_product2;
+
+        // Apply rotation for outputs 3 and 5.
+        temp1=xC3S5*irot_input_x;
+        DOROUND(temp1);
+        temp1>>=SHIFT_BITS;
+        temp2=xC5S3*irot_input_y ;
+        DOROUND(temp2);
+        temp2>>=SHIFT_BITS;
+        ip[3] = temp1 - temp2 ;
+
+
+        temp1=xC5S3*irot_input_x;
+        DOROUND(temp1);
+        temp1>>=SHIFT_BITS;
+        temp2=xC3S5*irot_input_y;
+        DOROUND(temp2);
+        temp2>>=SHIFT_BITS;
+        ip[5] = temp1 + temp2;
+
+        // Increment data pointer for next row
+        InputData += short_pitch ;
+        ip += 8;
+    }
+
+    // Performed DCT on rows, now transform the columns
+    ip = InterData;
+    for (loop = 0; loop < 8; loop++)
+    {
+        // Pre calculate some common sums and differences.
+        is07 = ip[0 * 8] + ip[7 * 8];
+        is12 = ip[1 * 8] + ip[2 * 8];
+        is34 = ip[3 * 8] + ip[4 * 8];
+        is56 = ip[5 * 8] + ip[6 * 8];
+
+        id07 = ip[0 * 8] - ip[7 * 8];
+        id12 = ip[1 * 8] - ip[2 * 8];
+        id34 = ip[3 * 8] - ip[4 * 8];
+        id56 = ip[5 * 8] - ip[6 * 8];
+
+        is0734 = is07 + is34;
+        is1256 = is12 + is56;
+
+        // Pre-Calculate some common product terms
+        icommon_product1 = xC4S4*(is12 - is56) ;
+        icommon_product2 = xC4S4*(id12 + id56) ;
+        DOROUND(icommon_product1)
+        DOROUND(icommon_product2)
+        icommon_product1>>=SHIFT_BITS;
+        icommon_product2>>=SHIFT_BITS;
+
+
+        temp1 = xC4S4*(is0734 + is1256) ;
+        temp2 = xC4S4*(is0734 - is1256) ;
+        DOROUND(temp1);
+        DOROUND(temp2);
+        temp1>>=SHIFT_BITS;
+
+        temp2>>=SHIFT_BITS;
+        op[0*8] = (temp1 + FINAL_ROUNDING)>>FINAL_SHIFT;
+        op[4*8] = (temp2 + FINAL_ROUNDING)>>FINAL_SHIFT;
+
+        // Define inputs to rotation for outputs 2 and 6
+        irot_input_x = id12 - id56;
+        irot_input_y = is07 - is34;
+
+        // Apply rotation for outputs 2 and 6.
+        temp1=xC6S2*irot_input_x;
+        DOROUND(temp1);
+        temp1>>=SHIFT_BITS;
+        temp2=xC2S6*irot_input_y;
+        DOROUND(temp2);
+        temp2>>=SHIFT_BITS;
+        op[2*8] = (temp1 + temp2 + FINAL_ROUNDING)>>FINAL_SHIFT;
+
+        temp1=xC6S2*irot_input_y;
+        DOROUND(temp1);
+        temp1>>=SHIFT_BITS;
+        temp2=xC2S6*irot_input_x ;
+        DOROUND(temp2);
+        temp2>>=SHIFT_BITS;
+        op[6*8] = (temp1 -temp2 + FINAL_ROUNDING)>>FINAL_SHIFT ;
+
+        // Define inputs to rotation for outputs 1 and 7
+        irot_input_x = icommon_product1 + id07;
+        irot_input_y = -( id34 + icommon_product2 );
+
+        // Apply rotation for outputs 1 and 7.
+        temp1=xC1S7*irot_input_x;
+        DOROUND(temp1);
+        temp1>>=SHIFT_BITS;
+        temp2=xC7S1*irot_input_y;
+        DOROUND(temp2);
+        temp2>>=SHIFT_BITS;
+        op[1*8] = (temp1 - temp2 + FINAL_ROUNDING)>>FINAL_SHIFT;
+
+        temp1=xC7S1*irot_input_x;
+        DOROUND(temp1);
+        temp1>>=SHIFT_BITS;
+        temp2=xC1S7*irot_input_y ;
+        DOROUND(temp2);
+        temp2>>=SHIFT_BITS;
+        op[7*8] = (temp1 + temp2 + FINAL_ROUNDING)>>FINAL_SHIFT;
+
+        // Define inputs to rotation for outputs 3 and 5
+        irot_input_x = id07 - icommon_product1;
+        irot_input_y = id34 - icommon_product2;
+
+        // Apply rotation for outputs 3 and 5.
+        temp1=xC3S5*irot_input_x;
+        DOROUND(temp1);
+        temp1>>=SHIFT_BITS;
+        temp2=xC5S3*irot_input_y ;
+        DOROUND(temp2);
+        temp2>>=SHIFT_BITS;
+        op[3*8] =  (temp1 - temp2 + FINAL_ROUNDING)>>FINAL_SHIFT ;
+
+
+        temp1=xC5S3*irot_input_x;
+        DOROUND(temp1);
+        temp1>>=SHIFT_BITS;
+        temp2=xC3S5*irot_input_y;
+        DOROUND(temp2);
+        temp2>>=SHIFT_BITS;
+        op[5*8] =  (temp1 + temp2 + FINAL_ROUNDING)>>FINAL_SHIFT;
+
+        // Increment data pointer for next column.
+        ip ++;
+        op ++;
+    }
+}
+#else
 
 void vp8_short_fdct8x8_c(short *block, short *coefs, int pitch)
 {
@@ -102,7 +345,7 @@ void vp8_short_fdct8x8_c(short *block, short *coefs, int pitch)
   return;
 }
 
-
+#endif
 
 void vp8_short_fhaar2x2_c(short *input, short *output, int pitch) //pitch = 8
 {
@@ -214,4 +457,4 @@ void vp8_short_walsh4x4_c(short *input, short *output, int pitch)
         ip += 4;
         op += 4;
     }
-}
+}
+\ No newline at end of file
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index ae8ab8846..81c69d9f8 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -239,7 +239,9 @@ void vp8_transform_mbuv_8x8(MACROBLOCK *x)
 {
     int i;
 
+#if !CONFIG_INT_8X8FDCT
     vp8_clear_system_state();
+#endif
 
     for (i = 16; i < 24; i += 4)
     {
@@ -252,9 +254,9 @@ void vp8_transform_mbuv_8x8(MACROBLOCK *x)
 void vp8_transform_intra_mby_8x8(MACROBLOCK *x)//changed
 {
     int i;
-
+#if !CONFIG_INT_8X8FDCT
     vp8_clear_system_state();
-
+#endif
     for (i = 0; i < 9; i += 8)
     {
         x->vp8_short_fdct8x8(&x->block[i].src_diff[0],
@@ -279,9 +281,9 @@ void vp8_transform_intra_mby_8x8(MACROBLOCK *x)//changed
 void vp8_transform_mb_8x8(MACROBLOCK *x)
 {
     int i;
-
+#if !CONFIG_INT_8X8FDCT
     vp8_clear_system_state();
-
+#endif
     for (i = 0; i < 9; i += 8)
     {
         x->vp8_short_fdct8x8(&x->block[i].src_diff[0],
@@ -312,9 +314,9 @@ void vp8_transform_mb_8x8(MACROBLOCK *x)
 void vp8_transform_mby_8x8(MACROBLOCK *x)
 {
     int i;
-
+#if !CONFIG_INT_8X8FDCT
     vp8_clear_system_state();
-
+#endif
     for (i = 0; i < 9; i += 8)
     {
         x->vp8_short_fdct8x8(&x->block[i].src_diff[0],
author	Yaowu Xu <yaowu@google.com>	2012-05-09 09:31:14 -0700
committer	Yaowu Xu <yaowu@google.com>	2012-05-15 07:28:26 -0700
commit	b22cc559b6f3d17a5b230f239066158328bbfb9a (patch)
tree	983efa09f345f11959a437d43965df8b965ae31f
parent	3536ad5bb9a630e58bd6503423023b16b40b201e (diff)
download	libvpx-b22cc559b6f3d17a5b230f239066158328bbfb9a.tar libvpx-b22cc559b6f3d17a5b230f239066158328bbfb9a.tar.gz libvpx-b22cc559b6f3d17a5b230f239066158328bbfb9a.tar.bz2 libvpx-b22cc559b6f3d17a5b230f239066158328bbfb9a.zip