summaryrefslogtreecommitdiff
path: root/vp9/encoder/x86
diff options
context:
space:
mode:
authorJingning Han <jingning@google.com>2015-03-31 10:08:29 -0700
committerGerrit Code Review <gerrit@gerrit.golo.chromium.org>2015-03-31 10:21:52 -0700
commit014fa4529802ec89d1f3239722955c1493b688db (patch)
tree2c22759f2d9dca324002c1edb099bc5d30522c21 /vp9/encoder/x86
parentebe1be9186b4f20e8d30c4bbad79ba78403de0d2 (diff)
downloadlibvpx-014fa4529802ec89d1f3239722955c1493b688db.tar
libvpx-014fa4529802ec89d1f3239722955c1493b688db.tar.gz
libvpx-014fa4529802ec89d1f3239722955c1493b688db.tar.bz2
libvpx-014fa4529802ec89d1f3239722955c1493b688db.zip
Use aligned copy in 8x8 Hadamard transform SSE2
This reduces the 8x8 Hadamard transform cycles by 20%. Change-Id: If34c5e02f3afa42244c6efabe121f7cf5d2df41b
Diffstat (limited to 'vp9/encoder/x86')
-rw-r--r--vp9/encoder/x86/vp9_avg_intrin_sse2.c16
1 files changed, 8 insertions, 8 deletions
diff --git a/vp9/encoder/x86/vp9_avg_intrin_sse2.c b/vp9/encoder/x86/vp9_avg_intrin_sse2.c
index f2c7c645f..ecd6ce9a2 100644
--- a/vp9/encoder/x86/vp9_avg_intrin_sse2.c
+++ b/vp9/encoder/x86/vp9_avg_intrin_sse2.c
@@ -148,21 +148,21 @@ void vp9_hadamard_8x8_sse2(int16_t const *src_diff, int src_stride,
hadamard_col8_sse2(src, 0);
hadamard_col8_sse2(src, 1);
- _mm_storeu_si128((__m128i *)coeff, src[0]);
+ _mm_store_si128((__m128i *)coeff, src[0]);
coeff += 8;
- _mm_storeu_si128((__m128i *)coeff, src[1]);
+ _mm_store_si128((__m128i *)coeff, src[1]);
coeff += 8;
- _mm_storeu_si128((__m128i *)coeff, src[2]);
+ _mm_store_si128((__m128i *)coeff, src[2]);
coeff += 8;
- _mm_storeu_si128((__m128i *)coeff, src[3]);
+ _mm_store_si128((__m128i *)coeff, src[3]);
coeff += 8;
- _mm_storeu_si128((__m128i *)coeff, src[4]);
+ _mm_store_si128((__m128i *)coeff, src[4]);
coeff += 8;
- _mm_storeu_si128((__m128i *)coeff, src[5]);
+ _mm_store_si128((__m128i *)coeff, src[5]);
coeff += 8;
- _mm_storeu_si128((__m128i *)coeff, src[6]);
+ _mm_store_si128((__m128i *)coeff, src[6]);
coeff += 8;
- _mm_storeu_si128((__m128i *)coeff, src[7]);
+ _mm_store_si128((__m128i *)coeff, src[7]);
}
void vp9_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride,