Merge "Tune IDCT8_1D macro function interface"

author: Jingning Han <jingning@google.com> 2014-01-06 09:38:19 -0800
committer: Gerrit Code Review <gerrit@gerrit.golo.chromium.org> 2014-01-06 09:38:19 -0800
commit: b49e9fb433620dff5b3f7045901d2c874cd647da (patch)
tree: dc2d8421c6fb139e0da1beada467a155203c02e9 /vp9
parent: bc27812c8b90ce77f46bf89402704d59faffcf62 (diff)
parent: 3e0c62b53fec118ea32518983be3fd633481dab7 (diff)
download: libvpx-b49e9fb433620dff5b3f7045901d2c874cd647da.tar
libvpx-b49e9fb433620dff5b3f7045901d2c874cd647da.tar.gz
libvpx-b49e9fb433620dff5b3f7045901d2c874cd647da.tar.bz2
libvpx-b49e9fb433620dff5b3f7045901d2c874cd647da.zip
1 files changed, 18 insertions, 21 deletions
diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c
index 06df85054..501bed5a8 100644
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -459,7 +459,9 @@ void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride,
       res1 = _mm_packs_epi32(tmp2, tmp3); \
   }
 
-#define IDCT8_1D  \
+#define IDCT8_1D(in0, in1, in2, in3, in4, in5, in6, in7, \
+                 out0, out1, out2, out3, out4, out5, out6, out7)  \
+  { \
   /* Stage1 */      \
   { \
     const __m128i lo_17 = _mm_unpacklo_epi16(in1, in7); \
@@ -519,14 +521,15 @@ void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride,
   } \
   \
   /* Stage4  */ \
-  in0 = _mm_adds_epi16(stp1_0, stp2_7); \
-  in1 = _mm_adds_epi16(stp1_1, stp1_6); \
-  in2 = _mm_adds_epi16(stp1_2, stp1_5); \
-  in3 = _mm_adds_epi16(stp1_3, stp2_4); \
-  in4 = _mm_subs_epi16(stp1_3, stp2_4); \
-  in5 = _mm_subs_epi16(stp1_2, stp1_5); \
-  in6 = _mm_subs_epi16(stp1_1, stp1_6); \
-  in7 = _mm_subs_epi16(stp1_0, stp2_7);
+  out0 = _mm_adds_epi16(stp1_0, stp2_7); \
+  out1 = _mm_adds_epi16(stp1_1, stp1_6); \
+  out2 = _mm_adds_epi16(stp1_2, stp1_5); \
+  out3 = _mm_adds_epi16(stp1_3, stp2_4); \
+  out4 = _mm_subs_epi16(stp1_3, stp2_4); \
+  out5 = _mm_subs_epi16(stp1_2, stp1_5); \
+  out6 = _mm_subs_epi16(stp1_1, stp1_6); \
+  out7 = _mm_subs_epi16(stp1_0, stp2_7); \
+  }
 
 #define RECON_AND_STORE(dest, in_x) \
   {                                                     \
@@ -574,7 +577,8 @@ void vp9_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
                   in0, in1, in2, in3, in4, in5, in6, in7);
 
     // 4-stage 1D idct8x8
-    IDCT8_1D
+    IDCT8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+             in0, in1, in2, in3, in4, in5, in6, in7);
   }
 
   // Final rounding and shift
@@ -697,15 +701,8 @@ static void idct8_1d_sse2(__m128i *in) {
                 in0, in1, in2, in3, in4, in5, in6, in7);
 
   // 4-stage 1D idct8x8
-  IDCT8_1D
-  in[0] = in0;
-  in[1] = in1;
-  in[2] = in2;
-  in[3] = in3;
-  in[4] = in4;
-  in[5] = in5;
-  in[6] = in6;
-  in[7] = in7;
+  IDCT8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+           in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7]);
 }
 
 static void iadst8_1d_sse2(__m128i *in) {
@@ -1112,9 +1109,9 @@ void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
   tmp3 = _mm_subs_epi16(stp1_2, stp1_5);
 
   TRANSPOSE_4X8_10(tmp0, tmp1, tmp2, tmp3, in0, in1, in2, in3)
-  in4 = in5 = in6 = in7 = zero;
 
-  IDCT8_1D
+  IDCT8_1D(in0, in1, in2, in3, zero, zero, zero, zero,
+           in0, in1, in2, in3, in4, in5, in6, in7);
   // Final rounding and shift
   in0 = _mm_adds_epi16(in0, final_rounding);
   in1 = _mm_adds_epi16(in1, final_rounding);
author	Jingning Han <jingning@google.com>	2014-01-06 09:38:19 -0800
committer	Gerrit Code Review <gerrit@gerrit.golo.chromium.org>	2014-01-06 09:38:19 -0800
commit	b49e9fb433620dff5b3f7045901d2c874cd647da (patch)
tree	dc2d8421c6fb139e0da1beada467a155203c02e9 /vp9
parent	bc27812c8b90ce77f46bf89402704d59faffcf62 (diff)
parent	3e0c62b53fec118ea32518983be3fd633481dab7 (diff)
download	libvpx-b49e9fb433620dff5b3f7045901d2c874cd647da.tar libvpx-b49e9fb433620dff5b3f7045901d2c874cd647da.tar.gz libvpx-b49e9fb433620dff5b3f7045901d2c874cd647da.tar.bz2 libvpx-b49e9fb433620dff5b3f7045901d2c874cd647da.zip