Merge "Switch the order of calculating 2-D inverse transform" into experimental

author: Yunqing Wang <yunqingwang@google.com> 2013-01-02 11:45:27 -0800
committer: Gerrit Code Review <gerrit@gerrit.golo.chromium.org> 2013-01-02 11:45:27 -0800
commit: 37166d5c1e4d3959cf26b8981bc6a545e511c92e (patch)
tree: 27a102d2003413fbc800fab8be250c51847a482a /vp9
parent: e9c69ab10222c98ec11d94f25c3579f296244fb3 (diff)
parent: cc80247f16ce83271e5c2043307dc65c8bb4bbf7 (diff)
download: libvpx-37166d5c1e4d3959cf26b8981bc6a545e511c92e.tar
libvpx-37166d5c1e4d3959cf26b8981bc6a545e511c92e.tar.gz
libvpx-37166d5c1e4d3959cf26b8981bc6a545e511c92e.tar.bz2
libvpx-37166d5c1e4d3959cf26b8981bc6a545e511c92e.zip
1 files changed, 25 insertions, 19 deletions
diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c
index 7ce8cbee1..6cbc25967 100644
--- a/vp9/common/vp9_idctllm.c
+++ b/vp9/common/vp9_idctllm.c
@@ -399,10 +399,10 @@ void vp9_ihtllm_float_c(const int16_t *input, int16_t *output, int pitch,
 }
 
 /* Converted the transforms to integer form. */
-#define VERTICAL_SHIFT 14  // 16
-#define VERTICAL_ROUNDING ((1 << (VERTICAL_SHIFT - 1)) - 1)
-#define HORIZONTAL_SHIFT 17  // 15
+#define HORIZONTAL_SHIFT 14  // 16
 #define HORIZONTAL_ROUNDING ((1 << (HORIZONTAL_SHIFT - 1)) - 1)
+#define VERTICAL_SHIFT 17  // 15
+#define VERTICAL_ROUNDING ((1 << (VERTICAL_SHIFT - 1)) - 1)
 void vp9_ihtllm_c(const int16_t *input, int16_t *output, int pitch,
                       TX_TYPE tx_type, int tx_dim) {
   int i, j, k;
@@ -444,41 +444,47 @@ void vp9_ihtllm_c(const int16_t *input, int16_t *output, int pitch,
       break;
   }
 
-  /* vertical transformation */
+  /* 2-D inverse transform X = M1*Z*Transposed_M2 is calculated in 2 steps
+   * from right to left:
+   * 1. horizontal transform: Y= Z*Transposed_M2
+   * 2. vertical transform: X = M1*Y
+   * In SIMD, doing this way could eliminate the transpose needed if it is
+   * calculated from left to right.
+   */
+  /* Horizontal transformation */
   for (j = 0; j < tx_dim; j++) {
     for (i = 0; i < tx_dim; i++) {
       int temp = 0;
 
       for (k = 0; k < tx_dim; k++) {
-        temp += ptv[k] * ip[(k * tx_dim)];
+        temp += ip[k] * pth[k];
       }
 
-      im[i] = (int16_t)((temp + VERTICAL_ROUNDING) >> VERTICAL_SHIFT);
-      ip++;
+      /* Calculate im and store it in its transposed position. */
+      im[i] = (int16_t)((temp + HORIZONTAL_ROUNDING) >> HORIZONTAL_SHIFT);
+      ip += tx_dim;
     }
-    im += tx_dim;  // 16
-    ptv += tx_dim;
+    im += tx_dim;
+    pth += tx_dim;
     ip = input;
   }
 
-  /* horizontal transformation */
+  /* Vertical transformation */
   im = &imbuf[0];
 
-  for (j = 0; j < tx_dim; j++) {
-    const int16_t *pthc = pth;
-
-    for (i = 0; i < tx_dim; i++) {
+  for (i = 0; i < tx_dim; i++) {
+    for (j = 0; j < tx_dim; j++) {
       int temp = 0;
 
       for (k = 0; k < tx_dim; k++) {
-        temp += im[k] * pthc[k];
+        temp += ptv[k] * im[k];
       }
 
-      op[i] = (int16_t)((temp + HORIZONTAL_ROUNDING) >> HORIZONTAL_SHIFT);
-      pthc += tx_dim;
+      op[j] = (int16_t)((temp + VERTICAL_ROUNDING) >> VERTICAL_SHIFT);
+      im += tx_dim;
     }
-
-    im += tx_dim;  // 16
+    im = &imbuf[0];
+    ptv += tx_dim;
     op += shortpitch;
   }
 }
author	Yunqing Wang <yunqingwang@google.com>	2013-01-02 11:45:27 -0800
committer	Gerrit Code Review <gerrit@gerrit.golo.chromium.org>	2013-01-02 11:45:27 -0800
commit	37166d5c1e4d3959cf26b8981bc6a545e511c92e (patch)
tree	27a102d2003413fbc800fab8be250c51847a482a /vp9
parent	e9c69ab10222c98ec11d94f25c3579f296244fb3 (diff)
parent	cc80247f16ce83271e5c2043307dc65c8bb4bbf7 (diff)
download	libvpx-37166d5c1e4d3959cf26b8981bc6a545e511c92e.tar libvpx-37166d5c1e4d3959cf26b8981bc6a545e511c92e.tar.gz libvpx-37166d5c1e4d3959cf26b8981bc6a545e511c92e.tar.bz2 libvpx-37166d5c1e4d3959cf26b8981bc6a545e511c92e.zip