summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
authorJingning Han <jingning@google.com>2014-01-06 09:38:19 -0800
committerGerrit Code Review <gerrit@gerrit.golo.chromium.org>2014-01-06 09:38:19 -0800
commitb49e9fb433620dff5b3f7045901d2c874cd647da (patch)
treedc2d8421c6fb139e0da1beada467a155203c02e9 /vp9
parentbc27812c8b90ce77f46bf89402704d59faffcf62 (diff)
parent3e0c62b53fec118ea32518983be3fd633481dab7 (diff)
downloadlibvpx-b49e9fb433620dff5b3f7045901d2c874cd647da.tar
libvpx-b49e9fb433620dff5b3f7045901d2c874cd647da.tar.gz
libvpx-b49e9fb433620dff5b3f7045901d2c874cd647da.tar.bz2
libvpx-b49e9fb433620dff5b3f7045901d2c874cd647da.zip
Merge "Tune IDCT8_1D macro function interface"
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/x86/vp9_idct_intrin_sse2.c39
1 files changed, 18 insertions, 21 deletions
diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c
index 06df85054..501bed5a8 100644
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -459,7 +459,9 @@ void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride,
res1 = _mm_packs_epi32(tmp2, tmp3); \
}
-#define IDCT8_1D \
+#define IDCT8_1D(in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3, out4, out5, out6, out7) \
+ { \
/* Stage1 */ \
{ \
const __m128i lo_17 = _mm_unpacklo_epi16(in1, in7); \
@@ -519,14 +521,15 @@ void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride,
} \
\
/* Stage4 */ \
- in0 = _mm_adds_epi16(stp1_0, stp2_7); \
- in1 = _mm_adds_epi16(stp1_1, stp1_6); \
- in2 = _mm_adds_epi16(stp1_2, stp1_5); \
- in3 = _mm_adds_epi16(stp1_3, stp2_4); \
- in4 = _mm_subs_epi16(stp1_3, stp2_4); \
- in5 = _mm_subs_epi16(stp1_2, stp1_5); \
- in6 = _mm_subs_epi16(stp1_1, stp1_6); \
- in7 = _mm_subs_epi16(stp1_0, stp2_7);
+ out0 = _mm_adds_epi16(stp1_0, stp2_7); \
+ out1 = _mm_adds_epi16(stp1_1, stp1_6); \
+ out2 = _mm_adds_epi16(stp1_2, stp1_5); \
+ out3 = _mm_adds_epi16(stp1_3, stp2_4); \
+ out4 = _mm_subs_epi16(stp1_3, stp2_4); \
+ out5 = _mm_subs_epi16(stp1_2, stp1_5); \
+ out6 = _mm_subs_epi16(stp1_1, stp1_6); \
+ out7 = _mm_subs_epi16(stp1_0, stp2_7); \
+ }
#define RECON_AND_STORE(dest, in_x) \
{ \
@@ -574,7 +577,8 @@ void vp9_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
in0, in1, in2, in3, in4, in5, in6, in7);
// 4-stage 1D idct8x8
- IDCT8_1D
+ IDCT8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
}
// Final rounding and shift
@@ -697,15 +701,8 @@ static void idct8_1d_sse2(__m128i *in) {
in0, in1, in2, in3, in4, in5, in6, in7);
// 4-stage 1D idct8x8
- IDCT8_1D
- in[0] = in0;
- in[1] = in1;
- in[2] = in2;
- in[3] = in3;
- in[4] = in4;
- in[5] = in5;
- in[6] = in6;
- in[7] = in7;
+ IDCT8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+ in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7]);
}
static void iadst8_1d_sse2(__m128i *in) {
@@ -1112,9 +1109,9 @@ void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
tmp3 = _mm_subs_epi16(stp1_2, stp1_5);
TRANSPOSE_4X8_10(tmp0, tmp1, tmp2, tmp3, in0, in1, in2, in3)
- in4 = in5 = in6 = in7 = zero;
- IDCT8_1D
+ IDCT8_1D(in0, in1, in2, in3, zero, zero, zero, zero,
+ in0, in1, in2, in3, in4, in5, in6, in7);
// Final rounding and shift
in0 = _mm_adds_epi16(in0, final_rounding);
in1 = _mm_adds_epi16(in1, final_rounding);