summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
authorLinfeng Zhang <linfengz@google.com>2017-05-08 15:41:24 -0700
committerLinfeng Zhang <linfengz@google.com>2017-05-08 16:16:52 -0700
commitecd1eb216224d3eee9ec060557e9677827dff1db (patch)
tree9216c4cb4f76cbe3e8d24ad727784feb15634a5e /vp9
parent8053dba5a1470a9ced7236f5b692b16f0d7b8a98 (diff)
downloadlibvpx-ecd1eb216224d3eee9ec060557e9677827dff1db.tar
libvpx-ecd1eb216224d3eee9ec060557e9677827dff1db.tar.gz
libvpx-ecd1eb216224d3eee9ec060557e9677827dff1db.tar.bz2
libvpx-ecd1eb216224d3eee9ec060557e9677827dff1db.zip
Update 4x4 idct sse2 functions
It's a bit faster to call idct4_sse2() in vpx_idct4x4_16_add_sse2() Change-Id: I1513be7a895cd2fc190f4a8297c240b17de0f876
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/x86/vp9_idct_intrin_sse2.c27
1 files changed, 1 insertions, 26 deletions
diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c
index dcfc454aa..bb2dcf52b 100644
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -16,7 +16,6 @@
void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
__m128i in[2];
- const __m128i zero = _mm_setzero_si128();
const __m128i eight = _mm_set1_epi16(8);
in[0] = load_input_data(input);
@@ -49,31 +48,7 @@ void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
in[0] = _mm_srai_epi16(in[0], 4);
in[1] = _mm_srai_epi16(in[1], 4);
- // Reconstruction and Store
- {
- __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest));
- __m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2));
- d0 = _mm_unpacklo_epi32(d0,
- _mm_cvtsi32_si128(*(const int *)(dest + stride)));
- d2 = _mm_unpacklo_epi32(
- d2, _mm_cvtsi32_si128(*(const int *)(dest + stride * 3)));
- d0 = _mm_unpacklo_epi8(d0, zero);
- d2 = _mm_unpacklo_epi8(d2, zero);
- d0 = _mm_add_epi16(d0, in[0]);
- d2 = _mm_add_epi16(d2, in[1]);
- d0 = _mm_packus_epi16(d0, d2);
- // store result[0]
- *(int *)dest = _mm_cvtsi128_si32(d0);
- // store result[1]
- d0 = _mm_srli_si128(d0, 4);
- *(int *)(dest + stride) = _mm_cvtsi128_si32(d0);
- // store result[2]
- d0 = _mm_srli_si128(d0, 4);
- *(int *)(dest + stride * 2) = _mm_cvtsi128_si32(d0);
- // store result[3]
- d0 = _mm_srli_si128(d0, 4);
- *(int *)(dest + stride * 3) = _mm_cvtsi128_si32(d0);
- }
+ recon_and_store4x4_sse2(in, dest, stride);
}
void vp9_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,