summaryrefslogtreecommitdiff
path: root/vpx_dsp
diff options
context:
space:
mode:
authorJohann Koenig <johannkoenig@google.com>2018-09-24 23:11:49 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2018-09-24 23:11:49 +0000
commit78f1ae5ffc4ccbcf3501fa782f5ed42ce1bc634d (patch)
tree90557f642b1d391d981ece205dcf95cc1dfb6ca1 /vpx_dsp
parentaf2ba81b94e362531af3cb118e406bc7d9115c98 (diff)
parent4fa0727fbcf9bb790139fa34b10d610af824a7ef (diff)
downloadlibvpx-78f1ae5ffc4ccbcf3501fa782f5ed42ce1bc634d.tar
libvpx-78f1ae5ffc4ccbcf3501fa782f5ed42ce1bc634d.tar.gz
libvpx-78f1ae5ffc4ccbcf3501fa782f5ed42ce1bc634d.tar.bz2
libvpx-78f1ae5ffc4ccbcf3501fa782f5ed42ce1bc634d.zip
Merge "sanitizer: sse2 - fix unaligned double stores"
Diffstat (limited to 'vpx_dsp')
-rw-r--r--vpx_dsp/x86/loopfilter_sse2.c24
-rw-r--r--vpx_dsp/x86/mem_sse2.h3
2 files changed, 11 insertions, 16 deletions
diff --git a/vpx_dsp/x86/loopfilter_sse2.c b/vpx_dsp/x86/loopfilter_sse2.c
index 1a76d670e..853c4d270 100644
--- a/vpx_dsp/x86/loopfilter_sse2.c
+++ b/vpx_dsp/x86/loopfilter_sse2.c
@@ -1627,16 +1627,12 @@ static INLINE void transpose(unsigned char *src[], int in_p,
x5 = _mm_unpacklo_epi16(x2, x3);
// 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71
x6 = _mm_unpacklo_epi32(x4, x5);
- _mm_storel_pd((double *)(out + 0 * out_p),
- _mm_castsi128_pd(x6)); // 00 10 20 30 40 50 60 70
- _mm_storeh_pd((double *)(out + 1 * out_p),
- _mm_castsi128_pd(x6)); // 01 11 21 31 41 51 61 71
+ mm_storelu(out + 0 * out_p, x6); // 00 10 20 30 40 50 60 70
+ mm_storehu(out + 1 * out_p, x6); // 01 11 21 31 41 51 61 71
// 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73
x7 = _mm_unpackhi_epi32(x4, x5);
- _mm_storel_pd((double *)(out + 2 * out_p),
- _mm_castsi128_pd(x7)); // 02 12 22 32 42 52 62 72
- _mm_storeh_pd((double *)(out + 3 * out_p),
- _mm_castsi128_pd(x7)); // 03 13 23 33 43 53 63 73
+ mm_storelu(out + 2 * out_p, x7); // 02 12 22 32 42 52 62 72
+ mm_storehu(out + 3 * out_p, x7); // 03 13 23 33 43 53 63 73
// 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37
x4 = _mm_unpackhi_epi16(x0, x1);
@@ -1644,17 +1640,13 @@ static INLINE void transpose(unsigned char *src[], int in_p,
x5 = _mm_unpackhi_epi16(x2, x3);
// 04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75
x6 = _mm_unpacklo_epi32(x4, x5);
- _mm_storel_pd((double *)(out + 4 * out_p),
- _mm_castsi128_pd(x6)); // 04 14 24 34 44 54 64 74
- _mm_storeh_pd((double *)(out + 5 * out_p),
- _mm_castsi128_pd(x6)); // 05 15 25 35 45 55 65 75
+ mm_storelu(out + 4 * out_p, x6); // 04 14 24 34 44 54 64 74
+ mm_storehu(out + 5 * out_p, x6); // 05 15 25 35 45 55 65 75
// 06 16 26 36 46 56 66 76 07 17 27 37 47 57 67 77
x7 = _mm_unpackhi_epi32(x4, x5);
- _mm_storel_pd((double *)(out + 6 * out_p),
- _mm_castsi128_pd(x7)); // 06 16 26 36 46 56 66 76
- _mm_storeh_pd((double *)(out + 7 * out_p),
- _mm_castsi128_pd(x7)); // 07 17 27 37 47 57 67 77
+ mm_storelu(out + 6 * out_p, x7); // 06 16 26 36 46 56 66 76
+ mm_storehu(out + 7 * out_p, x7); // 07 17 27 37 47 57 67 77
} while (++idx8x8 < num_8x8_to_transpose);
}
diff --git a/vpx_dsp/x86/mem_sse2.h b/vpx_dsp/x86/mem_sse2.h
index 48dc97970..5209a0628 100644
--- a/vpx_dsp/x86/mem_sse2.h
+++ b/vpx_dsp/x86/mem_sse2.h
@@ -26,6 +26,9 @@ static INLINE uint32_t loadu_uint32(const void *src) {
return v;
}
+#define mm_storelu(dst, v) memcpy((dst), (const char *)&(v), 8)
+#define mm_storehu(dst, v) memcpy((dst), (const char *)&(v) + 8, 8)
+
static INLINE __m128i loadh_epi64(const __m128i s, const void *const src) {
return _mm_castps_si128(
_mm_loadh_pi(_mm_castsi128_ps(s), (const __m64 *)src));