diff options
author | levytamar82 <tamar.levy@intel.com> | 2014-07-24 00:54:59 -0700 |
---|---|---|
committer | Gerrit Code Review <gerrit@gerrit.golo.chromium.org> | 2014-08-07 14:13:30 -0700 |
commit | af10457e02f608b79db1e2883b7780721cbf715d (patch) | |
tree | 9c09ae441d16ff37070596c3fcf86e1456bb76d1 /vp9/encoder/x86 | |
parent | 65234504b95b4bc9a155539c16e457223d2e6c25 (diff) | |
download | libvpx-af10457e02f608b79db1e2883b7780721cbf715d.tar libvpx-af10457e02f608b79db1e2883b7780721cbf715d.tar.gz libvpx-af10457e02f608b79db1e2883b7780721cbf715d.tar.bz2 libvpx-af10457e02f608b79db1e2883b7780721cbf715d.zip |
Fix bug 806
in the function sad32x32x4d and sad64x64x4d the source is aligned to 16 bytes
and not to 32 bytes - the load is now unaligned.
Change-Id: I922fdba56d0936b5cf72e4503519f185645a168c
Diffstat (limited to 'vp9/encoder/x86')
-rw-r--r-- | vp9/encoder/x86/vp9_sad4d_intrin_avx2.c | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c b/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c index f31b176e5..1feed6256 100644 --- a/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c +++ b/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c @@ -31,7 +31,7 @@ void vp9_sad32x32x4d_avx2(uint8_t *src, sum_ref3 = _mm256_set1_epi16(0); for (i = 0; i < 32 ; i++) { // load src and all refs - src_reg = _mm256_load_si256((__m256i *)(src)); + src_reg = _mm256_loadu_si256((__m256i *)(src)); ref0_reg = _mm256_loadu_si256((__m256i *) (ref0)); ref1_reg = _mm256_loadu_si256((__m256i *) (ref1)); ref2_reg = _mm256_loadu_si256((__m256i *) (ref2)); @@ -103,8 +103,8 @@ void vp9_sad64x64x4d_avx2(uint8_t *src, sum_ref3 = _mm256_set1_epi16(0); for (i = 0; i < 64 ; i++) { // load 64 bytes from src and all refs - src_reg = _mm256_load_si256((__m256i *)(src)); - srcnext_reg = _mm256_load_si256((__m256i *)(src + 32)); + src_reg = _mm256_loadu_si256((__m256i *)(src)); + srcnext_reg = _mm256_loadu_si256((__m256i *)(src + 32)); ref0_reg = _mm256_loadu_si256((__m256i *) (ref0)); ref0next_reg = _mm256_loadu_si256((__m256i *) (ref0 + 32)); ref1_reg = _mm256_loadu_si256((__m256i *) (ref1)); |