summaryrefslogtreecommitdiff
path: root/vpx_dsp/x86/fwd_txfm_sse2.c
diff options
context:
space:
mode:
authorJames Zern <jzern@google.com>2016-04-01 12:35:33 -0700
committerJames Zern <jzern@google.com>2016-04-04 16:02:06 -0700
commit3735def66700a90ef09d0a36e285f18479a9511c (patch)
tree123b751c5824a3e8929e3d86749d8e04d0487467 /vpx_dsp/x86/fwd_txfm_sse2.c
parentc98f8e04e573bdab8dbd2e9ab23354af58b2af68 (diff)
downloadlibvpx-3735def66700a90ef09d0a36e285f18479a9511c.tar
libvpx-3735def66700a90ef09d0a36e285f18479a9511c.tar.gz
libvpx-3735def66700a90ef09d0a36e285f18479a9511c.tar.bz2
libvpx-3735def66700a90ef09d0a36e285f18479a9511c.zip
vpx_fdctNxN_1_sse2: reduce store size
only output[0] needs to be set, store_output is more involved than a movdqa in the high bitdepth case Change-Id: I2cbd85d7cf74688bdf47eb767934fe42e02bff67
Diffstat (limited to 'vpx_dsp/x86/fwd_txfm_sse2.c')
-rw-r--r--vpx_dsp/x86/fwd_txfm_sse2.c8
1 files changed, 4 insertions, 4 deletions
diff --git a/vpx_dsp/x86/fwd_txfm_sse2.c b/vpx_dsp/x86/fwd_txfm_sse2.c
index bca72e874..62c18d3b4 100644
--- a/vpx_dsp/x86/fwd_txfm_sse2.c
+++ b/vpx_dsp/x86/fwd_txfm_sse2.c
@@ -40,7 +40,7 @@ void vpx_fdct4x4_1_sse2(const int16_t *input, tran_low_t *output, int stride) {
in1 = _mm_add_epi32(tmp, in0);
in0 = _mm_slli_epi32(in1, 1);
- store_output(&in0, output);
+ output[0] = (tran_low_t)_mm_cvtsi128_si32(in0);
}
void vpx_fdct8x8_1_sse2(const int16_t *input, tran_low_t *output, int stride) {
@@ -80,7 +80,7 @@ void vpx_fdct8x8_1_sse2(const int16_t *input, tran_low_t *output, int stride) {
in0 = _mm_srli_si128(sum, 8);
in1 = _mm_add_epi32(sum, in0);
- store_output(&in1, output);
+ output[0] = (tran_low_t)_mm_cvtsi128_si32(in1);
}
void vpx_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output,
@@ -149,7 +149,7 @@ void vpx_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output,
in1 = _mm_add_epi32(sum, in0);
in1 = _mm_srai_epi32(in1, 1);
- store_output(&in1, output);
+ output[0] = (tran_low_t)_mm_cvtsi128_si32(in1);
}
void vpx_fdct32x32_1_sse2(const int16_t *input, tran_low_t *output,
@@ -221,7 +221,7 @@ void vpx_fdct32x32_1_sse2(const int16_t *input, tran_low_t *output,
in1 = _mm_add_epi32(sum, in0);
in1 = _mm_srai_epi32(in1, 3);
- store_output(&in1, output);
+ output[0] = (tran_low_t)_mm_cvtsi128_si32(in1);
}
#define DCT_HIGH_BIT_DEPTH 0