summaryrefslogtreecommitdiff
path: root/vp9/encoder/x86
diff options
context:
space:
mode:
authorlevytamar82 <tamar.levy@intel.com>2014-07-24 00:54:59 -0700
committerGerrit Code Review <gerrit@gerrit.golo.chromium.org>2014-08-07 14:13:30 -0700
commitaf10457e02f608b79db1e2883b7780721cbf715d (patch)
tree9c09ae441d16ff37070596c3fcf86e1456bb76d1 /vp9/encoder/x86
parent65234504b95b4bc9a155539c16e457223d2e6c25 (diff)
downloadlibvpx-af10457e02f608b79db1e2883b7780721cbf715d.tar
libvpx-af10457e02f608b79db1e2883b7780721cbf715d.tar.gz
libvpx-af10457e02f608b79db1e2883b7780721cbf715d.tar.bz2
libvpx-af10457e02f608b79db1e2883b7780721cbf715d.zip
Fix bug 806
in the function sad32x32x4d and sad64x64x4d the source is aligned to 16 bytes and not to 32 bytes - the load is now unaligned. Change-Id: I922fdba56d0936b5cf72e4503519f185645a168c
Diffstat (limited to 'vp9/encoder/x86')
-rw-r--r--vp9/encoder/x86/vp9_sad4d_intrin_avx2.c6
1 files changed, 3 insertions, 3 deletions
diff --git a/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c b/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c
index f31b176e5..1feed6256 100644
--- a/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c
+++ b/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c
@@ -31,7 +31,7 @@ void vp9_sad32x32x4d_avx2(uint8_t *src,
sum_ref3 = _mm256_set1_epi16(0);
for (i = 0; i < 32 ; i++) {
// load src and all refs
- src_reg = _mm256_load_si256((__m256i *)(src));
+ src_reg = _mm256_loadu_si256((__m256i *)(src));
ref0_reg = _mm256_loadu_si256((__m256i *) (ref0));
ref1_reg = _mm256_loadu_si256((__m256i *) (ref1));
ref2_reg = _mm256_loadu_si256((__m256i *) (ref2));
@@ -103,8 +103,8 @@ void vp9_sad64x64x4d_avx2(uint8_t *src,
sum_ref3 = _mm256_set1_epi16(0);
for (i = 0; i < 64 ; i++) {
// load 64 bytes from src and all refs
- src_reg = _mm256_load_si256((__m256i *)(src));
- srcnext_reg = _mm256_load_si256((__m256i *)(src + 32));
+ src_reg = _mm256_loadu_si256((__m256i *)(src));
+ srcnext_reg = _mm256_loadu_si256((__m256i *)(src + 32));
ref0_reg = _mm256_loadu_si256((__m256i *) (ref0));
ref0next_reg = _mm256_loadu_si256((__m256i *) (ref0 + 32));
ref1_reg = _mm256_loadu_si256((__m256i *) (ref1));