diff options
author | Jingning Han <jingning@google.com> | 2015-03-31 10:08:29 -0700 |
---|---|---|
committer | Gerrit Code Review <gerrit@gerrit.golo.chromium.org> | 2015-03-31 10:21:52 -0700 |
commit | 014fa4529802ec89d1f3239722955c1493b688db (patch) | |
tree | 2c22759f2d9dca324002c1edb099bc5d30522c21 /vp9/encoder/x86 | |
parent | ebe1be9186b4f20e8d30c4bbad79ba78403de0d2 (diff) | |
download | libvpx-014fa4529802ec89d1f3239722955c1493b688db.tar libvpx-014fa4529802ec89d1f3239722955c1493b688db.tar.gz libvpx-014fa4529802ec89d1f3239722955c1493b688db.tar.bz2 libvpx-014fa4529802ec89d1f3239722955c1493b688db.zip |
Use aligned copy in 8x8 Hadamard transform SSE2
This reduces the 8x8 Hadamard transform cycles by 20%.
Change-Id: If34c5e02f3afa42244c6efabe121f7cf5d2df41b
Diffstat (limited to 'vp9/encoder/x86')
-rw-r--r-- | vp9/encoder/x86/vp9_avg_intrin_sse2.c | 16 |
1 files changed, 8 insertions, 8 deletions
diff --git a/vp9/encoder/x86/vp9_avg_intrin_sse2.c b/vp9/encoder/x86/vp9_avg_intrin_sse2.c index f2c7c645f..ecd6ce9a2 100644 --- a/vp9/encoder/x86/vp9_avg_intrin_sse2.c +++ b/vp9/encoder/x86/vp9_avg_intrin_sse2.c @@ -148,21 +148,21 @@ void vp9_hadamard_8x8_sse2(int16_t const *src_diff, int src_stride, hadamard_col8_sse2(src, 0); hadamard_col8_sse2(src, 1); - _mm_storeu_si128((__m128i *)coeff, src[0]); + _mm_store_si128((__m128i *)coeff, src[0]); coeff += 8; - _mm_storeu_si128((__m128i *)coeff, src[1]); + _mm_store_si128((__m128i *)coeff, src[1]); coeff += 8; - _mm_storeu_si128((__m128i *)coeff, src[2]); + _mm_store_si128((__m128i *)coeff, src[2]); coeff += 8; - _mm_storeu_si128((__m128i *)coeff, src[3]); + _mm_store_si128((__m128i *)coeff, src[3]); coeff += 8; - _mm_storeu_si128((__m128i *)coeff, src[4]); + _mm_store_si128((__m128i *)coeff, src[4]); coeff += 8; - _mm_storeu_si128((__m128i *)coeff, src[5]); + _mm_store_si128((__m128i *)coeff, src[5]); coeff += 8; - _mm_storeu_si128((__m128i *)coeff, src[6]); + _mm_store_si128((__m128i *)coeff, src[6]); coeff += 8; - _mm_storeu_si128((__m128i *)coeff, src[7]); + _mm_store_si128((__m128i *)coeff, src[7]); } void vp9_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride, |