optimize 8x8 fdct rounding for accuracy

The commit added a final rounding choice for 8x8 forward dct to get rid of a sign bias at DC position and improve the accuracry in term of round trip error for 8x8 fDCT/iDCT. This commit also enabled forward 8x8 dct test. Change-Id: Ib67f99b0a24d513e230c7812bc04569d472fdc50
author: Yaowu Xu <yaowu@google.com> 2013-02-22 11:14:04 -0800
committer: Yaowu Xu <yaowu@google.com> 2013-02-22 16:55:30 -0800
commit: 22012ee99416dae8640e1b72009ea9aeaa143850 (patch)
tree: 077a1be6e02130b253a5bfc82b88e280fec05311 /vp9
parent: 4e2697f5cdc661f71ce2a2b947a63cd4c25712c3 (diff)
download: libvpx-22012ee99416dae8640e1b72009ea9aeaa143850.tar
libvpx-22012ee99416dae8640e1b72009ea9aeaa143850.tar.gz
libvpx-22012ee99416dae8640e1b72009ea9aeaa143850.tar.bz2
libvpx-22012ee99416dae8640e1b72009ea9aeaa143850.zip
1 files changed, 1 insertions, 244 deletions
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
index 4a1e78e93..a459e949b 100644
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -323,247 +323,6 @@ static const int16_t adst_i16[256] = {
 };
 #endif
 
-#define NEW_FDCT8x8 1
-#if !NEW_FDCT8x8
-static const int xC1S7 = 16069;
-static const int xC2S6 = 15137;
-static const int xC3S5 = 13623;
-static const int xC4S4 = 11585;
-static const int xC5S3 =  9102;
-static const int xC6S2 =  6270;
-static const int xC7S1 =  3196;
-
-#define SHIFT_BITS 14
-#define DOROUND(X) X += (1<<(SHIFT_BITS-1));
-
-#define FINAL_SHIFT 3
-#define FINAL_ROUNDING (1<<(FINAL_SHIFT -1))
-#define IN_SHIFT (FINAL_SHIFT+1)
-
-
-void vp9_short_fdct8x8_c(short *InputData, short *OutputData, int pitch) {
-  int loop;
-  int short_pitch = pitch >> 1;
-  int is07, is12, is34, is56;
-  int is0734, is1256;
-  int id07, id12, id34, id56;
-  int irot_input_x, irot_input_y;
-  int icommon_product1;      // Re-used product  (c4s4 * (s12 - s56))
-  int icommon_product2;      // Re-used product  (c4s4 * (d12 + d56))
-  int temp1, temp2;          // intermediate variable for computation
-
-  int  InterData[64];
-  int  *ip = InterData;
-  short *op = OutputData;
-
-  for (loop = 0; loop < 8; loop++) {
-    // Pre calculate some common sums and differences.
-    is07 = (InputData[0] + InputData[7]) << IN_SHIFT;
-    is12 = (InputData[1] + InputData[2]) << IN_SHIFT;
-    is34 = (InputData[3] + InputData[4]) << IN_SHIFT;
-    is56 = (InputData[5] + InputData[6]) << IN_SHIFT;
-    id07 = (InputData[0] - InputData[7]) << IN_SHIFT;
-    id12 = (InputData[1] - InputData[2]) << IN_SHIFT;
-    id34 = (InputData[3] - InputData[4]) << IN_SHIFT;
-    id56 = (InputData[5] - InputData[6]) << IN_SHIFT;
-
-    is0734 = is07 + is34;
-    is1256 = is12 + is56;
-
-    // Pre-Calculate some common product terms.
-    icommon_product1 = xC4S4 * (is12 - is56);
-    DOROUND(icommon_product1)
-    icommon_product1 >>= SHIFT_BITS;
-
-    icommon_product2 = xC4S4 * (id12 + id56);
-    DOROUND(icommon_product2)
-    icommon_product2 >>= SHIFT_BITS;
-
-
-    ip[0] = (xC4S4 * (is0734 + is1256));
-    DOROUND(ip[0]);
-    ip[0] >>= SHIFT_BITS;
-
-    ip[4] = (xC4S4 * (is0734 - is1256));
-    DOROUND(ip[4]);
-    ip[4] >>= SHIFT_BITS;
-
-    // Define inputs to rotation for outputs 2 and 6
-    irot_input_x = id12 - id56;
-    irot_input_y = is07 - is34;
-
-    // Apply rotation for outputs 2 and 6.
-    temp1 = xC6S2 * irot_input_x;
-    DOROUND(temp1);
-    temp1 >>= SHIFT_BITS;
-    temp2 = xC2S6 * irot_input_y;
-    DOROUND(temp2);
-    temp2 >>= SHIFT_BITS;
-    ip[2] = temp1 + temp2;
-
-    temp1 = xC6S2 * irot_input_y;
-    DOROUND(temp1);
-    temp1 >>= SHIFT_BITS;
-    temp2 = xC2S6 * irot_input_x;
-    DOROUND(temp2);
-    temp2 >>= SHIFT_BITS;
-    ip[6] = temp1 - temp2;
-
-    // Define inputs to rotation for outputs 1 and 7
-    irot_input_x = icommon_product1 + id07;
-    irot_input_y = -(id34 + icommon_product2);
-
-    // Apply rotation for outputs 1 and 7.
-    temp1 = xC1S7 * irot_input_x;
-    DOROUND(temp1);
-    temp1 >>= SHIFT_BITS;
-    temp2 = xC7S1 * irot_input_y;
-    DOROUND(temp2);
-    temp2 >>= SHIFT_BITS;
-    ip[1] = temp1 - temp2;
-
-    temp1 = xC7S1 * irot_input_x;
-    DOROUND(temp1);
-    temp1 >>= SHIFT_BITS;
-    temp2 = xC1S7 * irot_input_y;
-    DOROUND(temp2);
-    temp2 >>= SHIFT_BITS;
-    ip[7] = temp1 + temp2;
-
-    // Define inputs to rotation for outputs 3 and 5
-    irot_input_x = id07 - icommon_product1;
-    irot_input_y = id34 - icommon_product2;
-
-    // Apply rotation for outputs 3 and 5.
-    temp1 = xC3S5 * irot_input_x;
-    DOROUND(temp1);
-    temp1 >>= SHIFT_BITS;
-    temp2 = xC5S3 * irot_input_y;
-    DOROUND(temp2);
-    temp2 >>= SHIFT_BITS;
-    ip[3] = temp1 - temp2;
-
-
-    temp1 = xC5S3 * irot_input_x;
-    DOROUND(temp1);
-    temp1 >>= SHIFT_BITS;
-    temp2 = xC3S5 * irot_input_y;
-    DOROUND(temp2);
-    temp2 >>= SHIFT_BITS;
-    ip[5] = temp1 + temp2;
-
-    // Increment data pointer for next row
-    InputData += short_pitch;
-    ip += 8;
-  }
-
-  // Performed DCT on rows, now transform the columns
-  ip = InterData;
-  for (loop = 0; loop < 8; loop++) {
-    // Pre calculate some common sums and differences.
-    is07 = ip[0 * 8] + ip[7 * 8];
-    is12 = ip[1 * 8] + ip[2 * 8];
-    is34 = ip[3 * 8] + ip[4 * 8];
-    is56 = ip[5 * 8] + ip[6 * 8];
-
-    id07 = ip[0 * 8] - ip[7 * 8];
-    id12 = ip[1 * 8] - ip[2 * 8];
-    id34 = ip[3 * 8] - ip[4 * 8];
-    id56 = ip[5 * 8] - ip[6 * 8];
-
-    is0734 = is07 + is34;
-    is1256 = is12 + is56;
-
-    // Pre-Calculate some common product terms
-    icommon_product1 = xC4S4 * (is12 - is56);
-    icommon_product2 = xC4S4 * (id12 + id56);
-    DOROUND(icommon_product1)
-    DOROUND(icommon_product2)
-    icommon_product1 >>= SHIFT_BITS;
-    icommon_product2 >>= SHIFT_BITS;
-
-
-    temp1 = xC4S4 * (is0734 + is1256);
-    temp2 = xC4S4 * (is0734 - is1256);
-    DOROUND(temp1);
-    DOROUND(temp2);
-    temp1 >>= SHIFT_BITS;
-
-    temp2 >>= SHIFT_BITS;
-    op[0 * 8] = (temp1 + FINAL_ROUNDING) >> FINAL_SHIFT;
-    op[4 * 8] = (temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
-    // Define inputs to rotation for outputs 2 and 6
-    irot_input_x = id12 - id56;
-    irot_input_y = is07 - is34;
-
-    // Apply rotation for outputs 2 and 6.
-    temp1 = xC6S2 * irot_input_x;
-    DOROUND(temp1);
-    temp1 >>= SHIFT_BITS;
-    temp2 = xC2S6 * irot_input_y;
-    DOROUND(temp2);
-    temp2 >>= SHIFT_BITS;
-    op[2 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
-    temp1 = xC6S2 * irot_input_y;
-    DOROUND(temp1);
-    temp1 >>= SHIFT_BITS;
-    temp2 = xC2S6 * irot_input_x;
-    DOROUND(temp2);
-    temp2 >>= SHIFT_BITS;
-    op[6 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
-    // Define inputs to rotation for outputs 1 and 7
-    irot_input_x = icommon_product1 + id07;
-    irot_input_y = -(id34 + icommon_product2);
-
-    // Apply rotation for outputs 1 and 7.
-    temp1 = xC1S7 * irot_input_x;
-    DOROUND(temp1);
-    temp1 >>= SHIFT_BITS;
-    temp2 = xC7S1 * irot_input_y;
-    DOROUND(temp2);
-    temp2 >>= SHIFT_BITS;
-    op[1 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
-    temp1 = xC7S1 * irot_input_x;
-    DOROUND(temp1);
-    temp1 >>= SHIFT_BITS;
-    temp2 = xC1S7 * irot_input_y;
-    DOROUND(temp2);
-    temp2 >>= SHIFT_BITS;
-    op[7 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
-    // Define inputs to rotation for outputs 3 and 5
-    irot_input_x = id07 - icommon_product1;
-    irot_input_y = id34 - icommon_product2;
-
-    // Apply rotation for outputs 3 and 5.
-    temp1 = xC3S5 * irot_input_x;
-    DOROUND(temp1);
-    temp1 >>= SHIFT_BITS;
-    temp2 = xC5S3 * irot_input_y;
-    DOROUND(temp2);
-    temp2 >>= SHIFT_BITS;
-    op[3 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
-
-    temp1 = xC5S3 * irot_input_x;
-    DOROUND(temp1);
-    temp1 >>= SHIFT_BITS;
-    temp2 = xC3S5 * irot_input_y;
-    DOROUND(temp2);
-    temp2 >>= SHIFT_BITS;
-    op[5 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
-    // Increment data pointer for next column.
-    ip++;
-    op++;
-  }
-}
-#endif
-
 /* For test */
 #define TEST_INT 1
 #if TEST_INT
@@ -918,7 +677,6 @@ void vp9_short_fdct8x4_c(short *input, short *output, int pitch)
     vp9_short_fdct4x4_c(input + 4, output + 16, pitch);
 }
 
-#if NEW_FDCT8x8
 static void fdct8_1d(int16_t *input, int16_t *output) {
   int16_t step[8];
   int temp1, temp2;
@@ -986,10 +744,9 @@ void vp9_short_fdct8x8_c(int16_t *input, int16_t *output, int pitch) {
       temp_in[j] = out[j + i * 8];
     fdct8_1d(temp_in, temp_out);
     for (j = 0; j < 8; ++j)
-      output[j + i * 8] = temp_out[j] >> 1;
+      output[j + i * 8] = temp_out[j] / 2;
   }
 }
-#endif
 
 #if CONFIG_INTHT
 static void fadst8_1d(int16_t *input, int16_t *output) {
author	Yaowu Xu <yaowu@google.com>	2013-02-22 11:14:04 -0800
committer	Yaowu Xu <yaowu@google.com>	2013-02-22 16:55:30 -0800
commit	22012ee99416dae8640e1b72009ea9aeaa143850 (patch)
tree	077a1be6e02130b253a5bfc82b88e280fec05311 /vp9
parent	4e2697f5cdc661f71ce2a2b947a63cd4c25712c3 (diff)
download	libvpx-22012ee99416dae8640e1b72009ea9aeaa143850.tar libvpx-22012ee99416dae8640e1b72009ea9aeaa143850.tar.gz libvpx-22012ee99416dae8640e1b72009ea9aeaa143850.tar.bz2 libvpx-22012ee99416dae8640e1b72009ea9aeaa143850.zip