summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
authorJames Zern <jzern@google.com>2014-08-14 18:42:34 -0700
committerGerrit Code Review <gerrit@gerrit.golo.chromium.org>2014-08-14 18:42:34 -0700
commit2ba201fcdb93656867d084d06d6522c820263a5e (patch)
tree2e8b70cfab36d03b2c19251e70fe9ff7081d049b /vp9
parent480693b9951120f32788e7f82bbc6c6079fb12b0 (diff)
parentefdfdf578792b0a9ed93e3b9dc747e1bfe7f5c0c (diff)
downloadlibvpx-2ba201fcdb93656867d084d06d6522c820263a5e.tar
libvpx-2ba201fcdb93656867d084d06d6522c820263a5e.tar.gz
libvpx-2ba201fcdb93656867d084d06d6522c820263a5e.tar.bz2
libvpx-2ba201fcdb93656867d084d06d6522c820263a5e.zip
Merge "32 Align Load bug In the sub_pixel_avg_variance the parameter sec was also aligned load and changed to unaligned."
Diffstat (limited to 'vp9')
-rw-r--r--vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c18
1 files changed, 9 insertions, 9 deletions
diff --git a/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c b/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
index 9aa4da962..a441cadaf 100644
--- a/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
+++ b/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
@@ -333,7 +333,7 @@ unsigned int vp9_sub_pixel_avg_variance32xh_avx2(const uint8_t *src,
if (y_offset == 0) {
for (i = 0; i < height ; i++) {
LOAD_SRC_DST
- sec_reg = _mm256_load_si256((__m256i const *) (sec));
+ sec_reg = _mm256_loadu_si256((__m256i const *) (sec));
src_reg = _mm256_avg_epu8(src_reg, sec_reg);
sec+= sec_stride;
// expend each byte to 2 bytes
@@ -347,7 +347,7 @@ unsigned int vp9_sub_pixel_avg_variance32xh_avx2(const uint8_t *src,
for (i = 0; i < height ; i++) {
LOAD_SRC_DST
AVG_NEXT_SRC(src_reg, src_stride)
- sec_reg = _mm256_load_si256((__m256i const *) (sec));
+ sec_reg = _mm256_loadu_si256((__m256i const *) (sec));
src_reg = _mm256_avg_epu8(src_reg, sec_reg);
sec+= sec_stride;
// expend each byte to 2 bytes
@@ -369,7 +369,7 @@ unsigned int vp9_sub_pixel_avg_variance32xh_avx2(const uint8_t *src,
MERGE_NEXT_SRC(src_reg, src_stride)
FILTER_SRC(filter)
src_reg = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
- sec_reg = _mm256_load_si256((__m256i const *) (sec));
+ sec_reg = _mm256_loadu_si256((__m256i const *) (sec));
src_reg = _mm256_avg_epu8(src_reg, sec_reg);
sec+= sec_stride;
MERGE_WITH_SRC(src_reg, zero_reg)
@@ -385,7 +385,7 @@ unsigned int vp9_sub_pixel_avg_variance32xh_avx2(const uint8_t *src,
for (i = 0; i < height ; i++) {
LOAD_SRC_DST
AVG_NEXT_SRC(src_reg, 1)
- sec_reg = _mm256_load_si256((__m256i const *) (sec));
+ sec_reg = _mm256_loadu_si256((__m256i const *) (sec));
src_reg = _mm256_avg_epu8(src_reg, sec_reg);
sec+= sec_stride;
// expand each byte to 2 bytes
@@ -409,7 +409,7 @@ unsigned int vp9_sub_pixel_avg_variance32xh_avx2(const uint8_t *src,
AVG_NEXT_SRC(src_reg, 1)
// average between previous average to current average
src_avg = _mm256_avg_epu8(src_avg, src_reg);
- sec_reg = _mm256_load_si256((__m256i const *) (sec));
+ sec_reg = _mm256_loadu_si256((__m256i const *) (sec));
src_avg = _mm256_avg_epu8(src_avg, sec_reg);
sec+= sec_stride;
// expand each byte to 2 bytes
@@ -437,7 +437,7 @@ unsigned int vp9_sub_pixel_avg_variance32xh_avx2(const uint8_t *src,
MERGE_WITH_SRC(src_avg, src_reg)
FILTER_SRC(filter)
src_avg = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
- sec_reg = _mm256_load_si256((__m256i const *) (sec));
+ sec_reg = _mm256_loadu_si256((__m256i const *) (sec));
src_avg = _mm256_avg_epu8(src_avg, sec_reg);
// expand each byte to 2 bytes
MERGE_WITH_SRC(src_avg, zero_reg)
@@ -459,7 +459,7 @@ unsigned int vp9_sub_pixel_avg_variance32xh_avx2(const uint8_t *src,
MERGE_NEXT_SRC(src_reg, 1)
FILTER_SRC(filter)
src_reg = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
- sec_reg = _mm256_load_si256((__m256i const *) (sec));
+ sec_reg = _mm256_loadu_si256((__m256i const *) (sec));
src_reg = _mm256_avg_epu8(src_reg, sec_reg);
MERGE_WITH_SRC(src_reg, zero_reg)
sec+= sec_stride;
@@ -487,7 +487,7 @@ unsigned int vp9_sub_pixel_avg_variance32xh_avx2(const uint8_t *src,
src_reg = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
// average between previous pack to the current
src_pack = _mm256_avg_epu8(src_pack, src_reg);
- sec_reg = _mm256_load_si256((__m256i const *) (sec));
+ sec_reg = _mm256_loadu_si256((__m256i const *) (sec));
src_pack = _mm256_avg_epu8(src_pack, sec_reg);
sec+= sec_stride;
MERGE_WITH_SRC(src_pack, zero_reg)
@@ -524,7 +524,7 @@ unsigned int vp9_sub_pixel_avg_variance32xh_avx2(const uint8_t *src,
// filter the source
FILTER_SRC(yfilter)
src_pack = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
- sec_reg = _mm256_load_si256((__m256i const *) (sec));
+ sec_reg = _mm256_loadu_si256((__m256i const *) (sec));
src_pack = _mm256_avg_epu8(src_pack, sec_reg);
MERGE_WITH_SRC(src_pack, zero_reg)
src_pack = src_reg;